From 9fa6c04c956dc05dd25e94845ccaf3d49ccf1013 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Wed, 18 Jun 2025 09:16:31 +0200 Subject: [PATCH 01/31] Split push for blocking and non-blocking --- README.md | 8 +++---- mergin/cli.py | 29 ++++++++++++------------- mergin/client.py | 11 +++++----- mergin/client_push.py | 43 ++++++++++++++++++++++++++++++++++++-- mergin/common.py | 2 +- mergin/test/test_client.py | 2 +- 6 files changed, 68 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index d0e9844..bb31546 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,9 @@ Commands: share Fetch permissions to project share-add Add permissions to [users] to project share-remove Remove [users] permissions from project - show-file-changeset Displays information about project changes. - show-file-history Displays information about a single version of a... - show-version Displays information about a single version of a... + show-file-changeset Display information about project changes. + show-file-history Display information about a single version of a... + show-version Display information about a single version of a... status Show all changes in project files - upstream and... ``` @@ -99,7 +99,7 @@ To download a specific version of a project: $ mergin --username john download --version v42 john/project1 ~/mergin/project1 ``` -To download a sepecific version of a single file: +To download a specific version of a single file: 1. First you need to download the project: ``` diff --git a/mergin/cli.py b/mergin/cli.py index e479b88..b30e6c2 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -412,17 +412,18 @@ def push(ctx): return directory = os.getcwd() try: - job = push_project_async(mc, directory) - if job is not None: # if job is none, we don't upload any files, and the transaction is finished already - with click.progressbar(length=job.total_size) as bar: - last_transferred_size = 0 - while push_project_is_running(job): - time.sleep(1 / 10) # 100ms - new_transferred_size = job.transferred_size - bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only - last_transferred_size = new_transferred_size - push_project_finalize(job) - click.echo("Done") + blocking_job, non_blocking_job = push_project_async(mc, directory) + for job in [blocking_job, non_blocking_job]: + if job is not None: # if job is none, we don't upload any files, and the transaction is finished already + with click.progressbar(length=job.total_size) as bar: + last_transferred_size = 0 + while push_project_is_running(job): + time.sleep(1 / 10) # 100ms + new_transferred_size = job.transferred_size + bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only + last_transferred_size = new_transferred_size + push_project_finalize(job) + click.echo("Done") except InvalidProject as e: click.secho("Invalid project directory ({})".format(str(e)), fg="red") except ClientError as e: @@ -473,7 +474,7 @@ def pull(ctx): @click.argument("version") @click.pass_context def show_version(ctx, version): - """Displays information about a single version of a project. `version` is 'v1', 'v2', etc.""" + """Display information about a single version of a project. `version` is 'v1', 'v2', etc.""" mc = ctx.obj["client"] if mc is None: return @@ -492,7 +493,7 @@ def show_version(ctx, version): @click.argument("path") @click.pass_context def show_file_history(ctx, path): - """Displays information about a single version of a project.""" + """Display information about a single version of a project.""" mc = ctx.obj["client"] if mc is None: return @@ -516,7 +517,7 @@ def show_file_history(ctx, path): @click.argument("version") @click.pass_context def show_file_changeset(ctx, path, version): - """Displays information about project changes.""" + """Display information about project changes.""" mc = ctx.obj["client"] if mc is None: return diff --git a/mergin/client.py b/mergin/client.py index f7ff4de..4be45d9 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -876,11 +876,12 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - job = push_project_async(self, directory) - if job is None: - return # there is nothing to push (or we only deleted some files) - push_project_wait(job) - push_project_finalize(job) + blocking_job, non_blocking_job = push_project_async(self, directory) + for job in [blocking_job, non_blocking_job]: + if job is None: + return # there is nothing to push (or we only deleted some files) + push_project_wait(job) + push_project_finalize(job) def pull_project(self, directory): """ diff --git a/mergin/client_push.py b/mergin/client_push.py index 885db9a..48f3c9b 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -19,6 +19,7 @@ from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject from .editor import filter_changes +from .utils import is_qgis_file, is_versioned_file class UploadJob: @@ -122,6 +123,23 @@ def push_project_async(mc, directory): changes = mp.get_push_changes() changes = filter_changes(mc, project_info, changes) + + blocking_changes, non_blocking_changes = split_changes(changes) + + blocking_job = ( + _prepare_upload_job(mp, mc, project_path, local_version, blocking_changes) + if any(len(v) for v in blocking_changes.values()) + else None + ) + non_blocking_job = ( + _prepare_upload_job(mp, mc, project_path, local_version, non_blocking_changes) + if any(len(v) for v in non_blocking_changes.values()) + else None + ) + + return blocking_job, non_blocking_job + +def _prepare_upload_job(mp, mc, project_path, local_version, changes): mp.log.debug("push changes:\n" + pprint.pformat(changes)) tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") @@ -206,10 +224,8 @@ def push_project_async(mc, directory): for item in upload_queue_items: future = job.executor.submit(_do_upload, item, job) job.futures.append(future) - return job - def push_project_wait(job): """blocks until all upload tasks are finished""" @@ -334,3 +350,26 @@ def remove_diff_files(job) -> None: diff_file = job.mp.fpath_meta(change["diff"]["path"]) if os.path.exists(diff_file): os.remove(diff_file) + + +def split_changes(changes): + """Split changes into blocking and non-blocking. + + Blocking criteria: + - any updated files + - any removed files + - added files that are .gpkg or .qgz (.ggs) + """ + blocking = non_blocking = {"added": [], "updated": [], "removed": [], "renamed": []} + + blocking["updated"] = changes["updated"] + blocking["removed"] = changes["removed"] + blocking["renamed"] = changes["renamed"] + + for f in changes["added"]: + if is_qgis_file(f["path"]) or is_versioned_file(f["path"]): + blocking["added"].append(f) + else: + non_blocking["added"].append(f) + + return blocking, non_blocking diff --git a/mergin/common.py b/mergin/common.py index 9e65a6c..47200bf 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -1,7 +1,7 @@ import os from enum import Enum -CHUNK_SIZE = 100 * 1024 * 1024 +CHUNK_SIZE = 10 * 1024 * 1024 # there is an upper limit for chunk size on server, ideally should be requested from there once implemented UPLOAD_CHUNK_SIZE = 10 * 1024 * 1024 diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 31de795..40790d6 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -393,7 +393,7 @@ def test_cancel_push(mc): assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) # start pushing and then cancel the job - job = push_project_async(mc, project_dir) + job, _ = push_project_async(mc, project_dir) push_project_cancel(job) # if cancelled properly, we should be now able to do the push without any problem From dd03a38d3b652959d8f6d76dd0c4ccb4d5322723 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 20 Jun 2025 13:46:56 +0200 Subject: [PATCH 02/31] Introduce UploadChanges class and split method --- mergin/client_push.py | 60 ++++++++++++++++++++++++----------------- mergin/common.py | 2 +- mergin/editor.py | 7 ++--- mergin/merginproject.py | 36 +++++++++++-------------- 4 files changed, 57 insertions(+), 48 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index 48f3c9b..7e69202 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -83,6 +83,38 @@ def upload_blocking(self, mc, mp): raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id)) +class UploadChanges: + def __init__(self, added=None, updated=None, removed=None): + self.added = added or [] + self.updated = updated or [] + self.removed = removed or [] + self.renamed = [] + + def is_empty(self): + return not (self.added or self.updated or self.removed or self.renamed) + + def split(self): + blocking = UploadChanges() + non_blocking = UploadChanges() + + for file in self.added: + target = blocking if is_qgis_file(file["path"]) or is_versioned_file(file["path"]) else non_blocking + target.added.append(file) + + for file in self.updated: + blocking.updated.append(file) + + for file in self.removed: + blocking.removed.append(file) + + result = {} + if not blocking.is_empty(): + result["blocking"] = blocking + if not non_blocking.is_empty(): + result["non_blocking"] = non_blocking + return result + + def push_project_async(mc, directory): """Starts push of a project and returns pending upload job""" @@ -124,7 +156,7 @@ def push_project_async(mc, directory): changes = mp.get_push_changes() changes = filter_changes(mc, project_info, changes) - blocking_changes, non_blocking_changes = split_changes(changes) + blocking_changes, non_blocking_changes = changes.split() blocking_job = ( _prepare_upload_job(mp, mc, project_path, local_version, blocking_changes) @@ -132,13 +164,14 @@ def push_project_async(mc, directory): else None ) non_blocking_job = ( - _prepare_upload_job(mp, mc, project_path, local_version, non_blocking_changes) + _prepare_upload_job(mp, mc, project_path, local_version, non_blocking_changes) if any(len(v) for v in non_blocking_changes.values()) else None ) return blocking_job, non_blocking_job + def _prepare_upload_job(mp, mc, project_path, local_version, changes): mp.log.debug("push changes:\n" + pprint.pformat(changes)) @@ -226,6 +259,7 @@ def _prepare_upload_job(mp, mc, project_path, local_version, changes): job.futures.append(future) return job + def push_project_wait(job): """blocks until all upload tasks are finished""" @@ -351,25 +385,3 @@ def remove_diff_files(job) -> None: if os.path.exists(diff_file): os.remove(diff_file) - -def split_changes(changes): - """Split changes into blocking and non-blocking. - - Blocking criteria: - - any updated files - - any removed files - - added files that are .gpkg or .qgz (.ggs) - """ - blocking = non_blocking = {"added": [], "updated": [], "removed": [], "renamed": []} - - blocking["updated"] = changes["updated"] - blocking["removed"] = changes["removed"] - blocking["renamed"] = changes["renamed"] - - for f in changes["added"]: - if is_qgis_file(f["path"]) or is_versioned_file(f["path"]): - blocking["added"].append(f) - else: - non_blocking["added"].append(f) - - return blocking, non_blocking diff --git a/mergin/common.py b/mergin/common.py index 47200bf..9e65a6c 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -1,7 +1,7 @@ import os from enum import Enum -CHUNK_SIZE = 10 * 1024 * 1024 +CHUNK_SIZE = 100 * 1024 * 1024 # there is an upper limit for chunk size on server, ideally should be requested from there once implemented UPLOAD_CHUNK_SIZE = 10 * 1024 * 1024 diff --git a/mergin/editor.py b/mergin/editor.py index 237b0ea..3797f48 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -1,6 +1,7 @@ from itertools import filterfalse from typing import Callable, Dict, List +from .client_push import UploadChanges from .utils import is_mergin_config, is_qgis_file, is_versioned_file EDITOR_ROLE_NAME = "editor" @@ -24,7 +25,7 @@ def is_editor_enabled(mc, project_info: dict) -> bool: return server_support and project_role == EDITOR_ROLE_NAME -def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: +def _apply_editor_filters(changes: UploadChanges) -> UploadChanges: """ Applies editor-specific filters to the changes dictionary, removing any changes to files that are not in the editor's list of allowed files. @@ -36,11 +37,11 @@ def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict """ updated = changes.get("updated", []) - changes["updated"] = list(filterfalse(_disallowed_changes, updated)) + changes.updated = list(filterfalse(_disallowed_changes, updated)) return changes -def filter_changes(mc, project_info: dict, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: +def filter_changes(mc, project_info: dict, changes: UploadChanges) -> UploadChanges: """ Filters the given changes dictionary based on the editor's enabled state. diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 83d8510..209e72e 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -9,6 +9,7 @@ from datetime import datetime from dateutil.tz import tzlocal +from .client_push import UploadChanges from .editor import prevent_conflicted_copy from .common import UPLOAD_CHUNK_SIZE, InvalidProject, ClientError @@ -314,23 +315,13 @@ def inspect_files(self): ) return files_meta - def compare_file_sets(self, origin, current): + def compare_file_sets(self, origin, current) -> UploadChanges: """ - Helper function to calculate difference between two sets of files metadata using file names and checksums. + Calculate difference between two sets of file metadata using file names and checksums. - :Example: - - >>> origin = [{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}] - >>> current = [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}] - >>> self.compare_file_sets(origin, current) - {"added": [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}], "removed": [[{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}]], "renamed": [], "updated": []} - - :param origin: origin set of files metadata - :type origin: list[dict] - :param current: current set of files metadata to be compared against origin - :type current: list[dict] - :returns: changes between two sets with change type - :rtype: dict[str, list[dict]]' + :param origin: List of original file metadata + :param current: List of current file metadata + :return: UploadChanges instance with added, updated, removed """ origin_map = {f["path"]: f for f in origin} current_map = {f["path"]: f for f in current} @@ -347,7 +338,12 @@ def compare_file_sets(self, origin, current): f["origin_checksum"] = origin_map[path]["checksum"] updated.append(f) - return {"renamed": [], "added": added, "removed": removed, "updated": updated} + return UploadChanges( + added=added, + updated=updated, + removed=removed, + ) + def get_pull_changes(self, server_files): """ @@ -405,7 +401,7 @@ def get_pull_changes(self, server_files): changes["updated"] = [f for f in changes["updated"] if f not in not_updated] return changes - def get_push_changes(self): + def get_push_changes(self) -> UploadChanges: """ Calculate changes needed to be pushed to server. @@ -427,9 +423,9 @@ def get_push_changes(self): file["checksum"] = checksum file["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(file["size"] / UPLOAD_CHUNK_SIZE))] - # need to check for for real changes in geodiff files using geodiff tool (comparing checksum is not enough) + # need to check for real changes in geodiff files using geodiff tool (comparing checksum is not enough) not_updated = [] - for file in changes["updated"]: + for file in changes.updated: path = file["path"] if not self.is_versioned_file(path): continue @@ -463,7 +459,7 @@ def get_push_changes(self): # we will need to do full upload of the file pass - changes["updated"] = [f for f in changes["updated"] if f not in not_updated] + changes.updated = [f for f in changes.updated if f not in not_updated] return changes def copy_versioned_file_for_upload(self, f, tmp_dir): From ff3afac6a7496cd48535473c712708ca4bd0b231 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Mon, 23 Jun 2025 15:00:00 +0200 Subject: [PATCH 03/31] Introduce UploadChangesHandler --- mergin/cli.py | 7 +- mergin/client_push.py | 262 ++++++++++++++++++++++-------------------- mergin/editor.py | 4 +- 3 files changed, 142 insertions(+), 131 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index b30e6c2..d8b1b3a 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -412,8 +412,8 @@ def push(ctx): return directory = os.getcwd() try: - blocking_job, non_blocking_job = push_project_async(mc, directory) - for job in [blocking_job, non_blocking_job]: + jobs = push_project_async(mc, directory) + for job in jobs: if job is not None: # if job is none, we don't upload any files, and the transaction is finished already with click.progressbar(length=job.total_size) as bar: last_transferred_size = 0 @@ -431,7 +431,8 @@ def push(ctx): return except KeyboardInterrupt: click.secho("Cancelling...") - push_project_cancel(job) + for job in jobs: + push_project_cancel(job) except Exception as e: _print_unhandled_exception() diff --git a/mergin/client_push.py b/mergin/client_push.py index 7e69202..74dd6fe 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -15,6 +15,7 @@ import tempfile import concurrent.futures import os +from typing import Dict, List from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject @@ -83,40 +84,63 @@ def upload_blocking(self, mc, mp): raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id)) -class UploadChanges: - def __init__(self, added=None, updated=None, removed=None): - self.added = added or [] - self.updated = updated or [] - self.removed = removed or [] - self.renamed = [] - - def is_empty(self): - return not (self.added or self.updated or self.removed or self.renamed) - - def split(self): - blocking = UploadChanges() - non_blocking = UploadChanges() - - for file in self.added: - target = blocking if is_qgis_file(file["path"]) or is_versioned_file(file["path"]) else non_blocking - target.added.append(file) +class UploadChangesHandler: + """ + Handles preparation of file changes to be uploaded to the server. - for file in self.updated: - blocking.updated.append(file) + This class is responsible for: + - Filtering project file changes. + - Splitting changes into blocking and non-blocking groups. + - TODO: Applying limits such as max file count or size to break large uploads into smaller batches. + - Generating upload-ready change groups for asynchronous job creation. + """ - for file in self.removed: - blocking.removed.append(file) + def __init__(self, mp, client, project_info): + self.mp = mp + self.client = client + self.project_info = project_info + self._raw_changes = mp.get_push_changes() + self._filtered_changes = filter_changes(client, project_info, self._raw_changes) + + @staticmethod + def is_blocking_file(file): + return is_qgis_file(file["path"]) or is_versioned_file(file["path"]) + + def split_by_type(self) -> List[Dict[str, List[dict]]]: + """ + Split raw filtered changes into two batches: + 1. Blocking: updated/removed and added files that are blocking + 2. Non-blocking: added files that are not blocking + + Returns a list of dicts each with keys: + - added, updated, removed, blocking + """ + blocking_group = {"added": [], "updated": [], "removed": [], "blocking": True} + non_blocking_group = {"added": [], "updated": [], "removed": [], "blocking": False} + + for f in self._filtered_changes.get("added", []): + if self.is_blocking_file(f): + blocking_group["added"].append(f) + else: + non_blocking_group["added"].append(f) + + for f in self._filtered_changes.get("updated", []): + blocking_group["updated"].append(f) + + for f in self._filtered_changes.get("removed", []): + blocking_group["removed"].append(f) + + result = [] + if any(blocking_group[k] for k in ("added", "updated", "removed")): + result.append(blocking_group) + if any(non_blocking_group["added"]): + result.append(non_blocking_group) - result = {} - if not blocking.is_empty(): - result["blocking"] = blocking - if not non_blocking.is_empty(): - result["non_blocking"] = non_blocking return result -def push_project_async(mc, directory): - """Starts push of a project and returns pending upload job""" +def push_project_async(mc, directory) -> List[UploadJob]: + """Starts push of a project and returns pending upload jobs""" mp = MerginProject(directory) if mp.has_unfinished_pull(): @@ -153,111 +177,97 @@ def push_project_async(mc, directory): + f"\n\nLocal version: {local_version}\nServer version: {server_version}" ) - changes = mp.get_push_changes() - changes = filter_changes(mc, project_info, changes) + changes_handler = UploadChangesHandler(mp, mc, project_info) + changes_groups = changes_handler.split_by_type() - blocking_changes, non_blocking_changes = changes.split() + jobs = [] + for changes in changes_groups: + mp.log.debug("push changes:\n" + pprint.pformat(changes)) - blocking_job = ( - _prepare_upload_job(mp, mc, project_path, local_version, blocking_changes) - if any(len(v) for v in blocking_changes.values()) - else None - ) - non_blocking_job = ( - _prepare_upload_job(mp, mc, project_path, local_version, non_blocking_changes) - if any(len(v) for v in non_blocking_changes.values()) - else None - ) + tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") - return blocking_job, non_blocking_job + # If there are any versioned files (aka .gpkg) that are not updated through a diff, + # we need to make a temporary copy somewhere to be sure that we are uploading full content. + # That's because if there are pending transactions, checkpointing or switching from WAL mode + # won't work, and we would end up with some changes left in -wal file which do not get + # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything. + for f in changes["updated"]: + if mp.is_versioned_file(f["path"]) and "diff" not in f: + mp.copy_versioned_file_for_upload(f, tmp_dir.name) + for f in changes["added"]: + if mp.is_versioned_file(f["path"]): + mp.copy_versioned_file_for_upload(f, tmp_dir.name) -def _prepare_upload_job(mp, mc, project_path, local_version, changes): - mp.log.debug("push changes:\n" + pprint.pformat(changes)) - - tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") - - # If there are any versioned files (aka .gpkg) that are not updated through a diff, - # we need to make a temporary copy somewhere to be sure that we are uploading full content. - # That's because if there are pending transactions, checkpointing or switching from WAL mode - # won't work, and we would end up with some changes left in -wal file which do not get - # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything. - for f in changes["updated"]: - if mp.is_versioned_file(f["path"]) and "diff" not in f: - mp.copy_versioned_file_for_upload(f, tmp_dir.name) - - for f in changes["added"]: - if mp.is_versioned_file(f["path"]): - mp.copy_versioned_file_for_upload(f, tmp_dir.name) - - if not sum(len(v) for v in changes.values()): - mp.log.info(f"--- push {project_path} - nothing to do") - return + if not sum(len(v) for v in changes.values()): + mp.log.info(f"--- push {project_path} - nothing to do") + return - # drop internal info from being sent to server - for item in changes["updated"]: - item.pop("origin_checksum", None) - data = {"version": local_version, "changes": changes} + # drop internal info from being sent to server + for item in changes["updated"]: + item.pop("origin_checksum", None) + data = {"version": local_version, "changes": changes} - try: - resp = mc.post( - f"/v1/project/push/{project_path}", - data, - {"Content-Type": "application/json"}, - ) - except ClientError as err: - mp.log.error("Error starting transaction: " + str(err)) - mp.log.info("--- push aborted") - raise - server_resp = json.load(resp) - - upload_files = data["changes"]["added"] + data["changes"]["updated"] - - transaction_id = server_resp["transaction"] if upload_files else None - job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir) - - if not upload_files: - mp.log.info("not uploading any files") - job.server_resp = server_resp - push_project_finalize(job) - return None # all done - no pending job - - mp.log.info(f"got transaction ID {transaction_id}") - - upload_queue_items = [] - total_size = 0 - # prepare file chunks for upload - for file in upload_files: - if "diff" in file: - # versioned file - uploading diff - file_location = mp.fpath_meta(file["diff"]["path"]) - file_size = file["diff"]["size"] - elif "upload_file" in file: - # versioned file - uploading full (a temporary copy) - file_location = file["upload_file"] - file_size = file["size"] - else: - # non-versioned file - file_location = mp.fpath(file["path"]) - file_size = file["size"] - - for chunk_index, chunk_id in enumerate(file["chunks"]): - size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) - upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index)) - - total_size += file_size - - job.total_size = total_size - job.upload_queue_items = upload_queue_items - - mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}") - - # start uploads in background - job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) - for item in upload_queue_items: - future = job.executor.submit(_do_upload, item, job) - job.futures.append(future) - return job + try: + resp = mc.post( + f"/v1/project/push/{project_path}", + data, + {"Content-Type": "application/json"}, + ) + except ClientError as err: + mp.log.error("Error starting transaction: " + str(err)) + mp.log.info("--- push aborted") + raise + server_resp = json.load(resp) + + upload_files = data["changes"]["added"] + data["changes"]["updated"] + + transaction_id = server_resp["transaction"] if upload_files else None + job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir) + + if not upload_files: + mp.log.info("not uploading any files") + job.server_resp = server_resp + push_project_finalize(job) + return None # all done - no pending job + + mp.log.info(f"got transaction ID {transaction_id}") + + upload_queue_items = [] + total_size = 0 + # prepare file chunks for upload + for file in upload_files: + if "diff" in file: + # versioned file - uploading diff + file_location = mp.fpath_meta(file["diff"]["path"]) + file_size = file["diff"]["size"] + elif "upload_file" in file: + # versioned file - uploading full (a temporary copy) + file_location = file["upload_file"] + file_size = file["size"] + else: + # non-versioned file + file_location = mp.fpath(file["path"]) + file_size = file["size"] + + for chunk_index, chunk_id in enumerate(file["chunks"]): + size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) + upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index)) + + total_size += file_size + + job.total_size = total_size + job.upload_queue_items = upload_queue_items + + mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}") + + # start uploads in background + job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) + for item in upload_queue_items: + future = job.executor.submit(_do_upload, item, job) + job.futures.append(future) + jobs.append(job) + return jobs def push_project_wait(job): diff --git a/mergin/editor.py b/mergin/editor.py index 3797f48..5a55a0e 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -25,7 +25,7 @@ def is_editor_enabled(mc, project_info: dict) -> bool: return server_support and project_role == EDITOR_ROLE_NAME -def _apply_editor_filters(changes: UploadChanges) -> UploadChanges: +def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: """ Applies editor-specific filters to the changes dictionary, removing any changes to files that are not in the editor's list of allowed files. @@ -41,7 +41,7 @@ def _apply_editor_filters(changes: UploadChanges) -> UploadChanges: return changes -def filter_changes(mc, project_info: dict, changes: UploadChanges) -> UploadChanges: +def filter_changes(mc, project_info: dict, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: """ Filters the given changes dictionary based on the editor's enabled state. From 70816878f6f5e33e5281395b04e88057e8c09bcb Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Mon, 23 Jun 2025 15:42:44 +0200 Subject: [PATCH 04/31] Put changes dicts back --- mergin/client_push.py | 12 ++++++------ mergin/editor.py | 5 ++--- mergin/merginproject.py | 36 +++++++++++++++++++----------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index 74dd6fe..f0b12c3 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -15,7 +15,7 @@ import tempfile import concurrent.futures import os -from typing import Dict, List +from typing import Dict, List, Optional from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject @@ -139,7 +139,7 @@ def split_by_type(self) -> List[Dict[str, List[dict]]]: return result -def push_project_async(mc, directory) -> List[UploadJob]: +def push_project_async(mc, directory) -> Optional[List[UploadJob]]: """Starts push of a project and returns pending upload jobs""" mp = MerginProject(directory) @@ -164,8 +164,8 @@ def push_project_async(mc, directory) -> List[UploadJob]: username = mc.username() # permissions field contains information about update, delete and upload privileges of the user - # on a specific project. This is more accurate information then "writernames" field, as it takes - # into account namespace privileges. So we have to check only "permissions", namely "upload" one + # on a specific project. This is more accurate information than "writernames" field, as it takes + # into account namespace privileges. So we have to check only "permissions", namely "upload" once if not mc.has_writing_permissions(project_path): mp.log.error(f"--- push {project_path} - username {username} does not have write access") raise ClientError(f"You do not seem to have write access to the project (username '{username}')") @@ -180,12 +180,12 @@ def push_project_async(mc, directory) -> List[UploadJob]: changes_handler = UploadChangesHandler(mp, mc, project_info) changes_groups = changes_handler.split_by_type() + tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") jobs = [] + for changes in changes_groups: mp.log.debug("push changes:\n" + pprint.pformat(changes)) - tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") - # If there are any versioned files (aka .gpkg) that are not updated through a diff, # we need to make a temporary copy somewhere to be sure that we are uploading full content. # That's because if there are pending transactions, checkpointing or switching from WAL mode diff --git a/mergin/editor.py b/mergin/editor.py index 5a55a0e..610502b 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -1,8 +1,7 @@ from itertools import filterfalse from typing import Callable, Dict, List -from .client_push import UploadChanges -from .utils import is_mergin_config, is_qgis_file, is_versioned_file +from .utils import is_qgis_file EDITOR_ROLE_NAME = "editor" @@ -37,7 +36,7 @@ def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict """ updated = changes.get("updated", []) - changes.updated = list(filterfalse(_disallowed_changes, updated)) + changes["updated"] = list(filterfalse(_disallowed_changes, updated)) return changes diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 209e72e..82b5092 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -7,9 +7,9 @@ import uuid import tempfile from datetime import datetime +from typing import List, Dict from dateutil.tz import tzlocal -from .client_push import UploadChanges from .editor import prevent_conflicted_copy from .common import UPLOAD_CHUNK_SIZE, InvalidProject, ClientError @@ -315,13 +315,20 @@ def inspect_files(self): ) return files_meta - def compare_file_sets(self, origin, current) -> UploadChanges: - """ - Calculate difference between two sets of file metadata using file names and checksums. - - :param origin: List of original file metadata - :param current: List of current file metadata - :return: UploadChanges instance with added, updated, removed + def compare_file_sets(self, origin, current) -> Dict[str, List[dict]]: + """ + Calculate difference between two sets of files metadata using file names and checksums. + :Example: + >>> origin = [{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}] + >>> current = [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}] + >>> self.compare_file_sets(origin, current) + {"added": [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}], "removed": [[{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}]], "renamed": [], "updated": []} + :param origin: origin set of files metadata + :type origin: list[dict] + :param current: current set of files metadata to be compared against origin + :type current: list[dict] + :returns: changes between two sets with change type + :rtype: dict[str, list[dict]]' """ origin_map = {f["path"]: f for f in origin} current_map = {f["path"]: f for f in current} @@ -338,12 +345,7 @@ def compare_file_sets(self, origin, current) -> UploadChanges: f["origin_checksum"] = origin_map[path]["checksum"] updated.append(f) - return UploadChanges( - added=added, - updated=updated, - removed=removed, - ) - + return {"renamed": [], "added": added, "removed": removed, "updated": updated} def get_pull_changes(self, server_files): """ @@ -401,7 +403,7 @@ def get_pull_changes(self, server_files): changes["updated"] = [f for f in changes["updated"] if f not in not_updated] return changes - def get_push_changes(self) -> UploadChanges: + def get_push_changes(self) -> Dict[str, List[dict]]: """ Calculate changes needed to be pushed to server. @@ -425,7 +427,7 @@ def get_push_changes(self) -> UploadChanges: # need to check for real changes in geodiff files using geodiff tool (comparing checksum is not enough) not_updated = [] - for file in changes.updated: + for file in changes["updated"]: path = file["path"] if not self.is_versioned_file(path): continue @@ -459,7 +461,7 @@ def get_push_changes(self) -> UploadChanges: # we will need to do full upload of the file pass - changes.updated = [f for f in changes.updated if f not in not_updated] + changes["updated"] = [f for f in changes["updated"] if f not in not_updated] return changes def copy_versioned_file_for_upload(self, f, tmp_dir): From 87f072e5afe45cbb0a621c36e15dbe93487d6c48 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Mon, 23 Jun 2025 15:46:18 +0200 Subject: [PATCH 05/31] Fix legacy changes --- mergin/client.py | 4 ++-- mergin/test/test_client.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 4be45d9..0925ea5 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -876,8 +876,8 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - blocking_job, non_blocking_job = push_project_async(self, directory) - for job in [blocking_job, non_blocking_job]: + jobs = push_project_async(self, directory) + for job in jobs: if job is None: return # there is nothing to push (or we only deleted some files) push_project_wait(job) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 40790d6..52f43d7 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -393,8 +393,9 @@ def test_cancel_push(mc): assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) # start pushing and then cancel the job - job, _ = push_project_async(mc, project_dir) - push_project_cancel(job) + jobs = push_project_async(mc, project_dir) + for job in jobs: + push_project_cancel(job) # if cancelled properly, we should be now able to do the push without any problem mc.push_project(project_dir) From ed47d585ac82a3dc375901acdf967c47641660aa Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Tue, 24 Jun 2025 14:05:27 +0200 Subject: [PATCH 06/31] Make ChnagesHandler cleaner --- mergin/client_push.py | 73 +++++++++++++++++++++++++------------- mergin/editor.py | 17 --------- mergin/test/test_client.py | 7 ++-- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index f0b12c3..1636933 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -19,7 +19,7 @@ from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject -from .editor import filter_changes +from .editor import is_editor_enabled, _apply_editor_filters from .utils import is_qgis_file, is_versioned_file @@ -84,7 +84,7 @@ def upload_blocking(self, mc, mp): raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id)) -class UploadChangesHandler: +class ChangesHandler: """ Handles preparation of file changes to be uploaded to the server. @@ -95,49 +95,71 @@ class UploadChangesHandler: - Generating upload-ready change groups for asynchronous job creation. """ - def __init__(self, mp, client, project_info): - self.mp = mp + def __init__(self, client, project_info, changes): self.client = client self.project_info = project_info - self._raw_changes = mp.get_push_changes() - self._filtered_changes = filter_changes(client, project_info, self._raw_changes) + self._raw_changes = changes @staticmethod def is_blocking_file(file): return is_qgis_file(file["path"]) or is_versioned_file(file["path"]) - def split_by_type(self) -> List[Dict[str, List[dict]]]: + def _filter_changes(self, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: + """ + Filters the given changes dictionary based on the editor's enabled state. + + If the editor is not enabled, the changes dictionary is returned as-is. Otherwise, the changes are passed through the `_apply_editor_filters` method to apply any configured filters. + + Args: + changes (dict[str, list[dict]]): A dictionary mapping file paths to lists of change dictionaries. + + Returns: + dict[str, list[dict]]: The filtered changes dictionary. + """ + if not is_editor_enabled(self.client, self.project_info): + return changes + return _apply_editor_filters(changes) + + def _split_by_type(self, changes: Dict[str, List[dict]]) -> List[Dict[str, List[dict]]]: """ Split raw filtered changes into two batches: 1. Blocking: updated/removed and added files that are blocking 2. Non-blocking: added files that are not blocking - Returns a list of dicts each with keys: - - added, updated, removed, blocking + Adds blocking key with a boolean value for each group """ - blocking_group = {"added": [], "updated": [], "removed": [], "blocking": True} - non_blocking_group = {"added": [], "updated": [], "removed": [], "blocking": False} + blocking_changes = {"added": [], "updated": [], "removed": [], "blocking": True} + non_blocking_changes = {"added": [], "updated": [], "removed": [], "blocking": False} - for f in self._filtered_changes.get("added", []): + for f in changes.get("added", []): if self.is_blocking_file(f): - blocking_group["added"].append(f) + blocking_changes["added"].append(f) else: - non_blocking_group["added"].append(f) + non_blocking_changes["added"].append(f) - for f in self._filtered_changes.get("updated", []): - blocking_group["updated"].append(f) + for f in changes.get("updated", []): + blocking_changes["updated"].append(f) - for f in self._filtered_changes.get("removed", []): - blocking_group["removed"].append(f) + for f in changes.get("removed", []): + blocking_changes["removed"].append(f) result = [] - if any(blocking_group[k] for k in ("added", "updated", "removed")): - result.append(blocking_group) - if any(non_blocking_group["added"]): - result.append(non_blocking_group) + if any(blocking_changes[k] for k in ("added", "updated", "removed")): + result.append(blocking_changes) + if any(non_blocking_changes["added"]): + result.append(non_blocking_changes) return result + def split(self) -> List[Dict[str, List[dict]]]: + """ + Applies all configured internal filters and returns a list of change ready to be uploaded. + """ + changes = self._filter_changes(self._raw_changes) + changes = self._split_by_type(changes) + # TODO: apply limits; changes = self._limit_by_file_count(changes) + return changes + def push_project_async(mc, directory) -> Optional[List[UploadJob]]: """Starts push of a project and returns pending upload jobs""" @@ -177,13 +199,14 @@ def push_project_async(mc, directory) -> Optional[List[UploadJob]]: + f"\n\nLocal version: {local_version}\nServer version: {server_version}" ) - changes_handler = UploadChangesHandler(mp, mc, project_info) - changes_groups = changes_handler.split_by_type() + changes = mp.get_push_changes() + changes_handler = ChangesHandler(mc, project_info, changes) + changes_list = changes_handler.split() tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") jobs = [] - for changes in changes_groups: + for changes in changes_list: mp.log.debug("push changes:\n" + pprint.pformat(changes)) # If there are any versioned files (aka .gpkg) that are not updated through a diff, diff --git a/mergin/editor.py b/mergin/editor.py index 610502b..aa6420c 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -40,23 +40,6 @@ def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict return changes -def filter_changes(mc, project_info: dict, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: - """ - Filters the given changes dictionary based on the editor's enabled state. - - If the editor is not enabled, the changes dictionary is returned as-is. Otherwise, the changes are passed through the `_apply_editor_filters` method to apply any configured filters. - - Args: - changes (dict[str, list[dict]]): A dictionary mapping file paths to lists of change dictionaries. - - Returns: - dict[str, list[dict]]: The filtered changes dictionary. - """ - if not is_editor_enabled(mc, project_info): - return changes - return _apply_editor_filters(changes) - - def prevent_conflicted_copy(path: str, mc, project_info: dict) -> bool: """ Decides whether a file path should be blocked from creating a conflicted copy. diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 52f43d7..049a0ed 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -21,7 +21,7 @@ TokenError, ServerType, ) -from ..client_push import push_project_async, push_project_cancel +from ..client_push import push_project_async, push_project_cancel, ChangesHandler from ..client_pull import ( download_project_async, download_project_wait, @@ -38,7 +38,7 @@ ) from ..merginproject import pygeodiff from ..report import create_report -from ..editor import EDITOR_ROLE_NAME, filter_changes, is_editor_enabled +from ..editor import EDITOR_ROLE_NAME, is_editor_enabled from ..common import ErrorCode, WorkspaceRole, ProjectRole SERVER_URL = os.environ.get("TEST_MERGIN_URL") @@ -2593,7 +2593,8 @@ def test_editor(mc: MerginClient): "updated": [{"path": "/folder/project.updated.Qgs"}], "removed": [{"path": "/folder/project.removed.qgs"}], } - qgs_changeset = filter_changes(mc, project_info, qgs_changeset) + changes_handler = ChangesHandler(mc, project_info, qgs_changeset) + qgs_changeset = changes_handler._filter_changes(qgs_changeset) assert sum(len(v) for v in qgs_changeset.values()) == 2 From aab4c7dbd212846216e52020cb44e5c394560eeb Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Wed, 25 Jun 2025 13:37:50 +0200 Subject: [PATCH 07/31] Remove blocking flag --- mergin/client_push.py | 10 ++++------ mergin/common.py | 2 +- mergin/merginproject.py | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index 1636933..4474060 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -125,11 +125,9 @@ def _split_by_type(self, changes: Dict[str, List[dict]]) -> List[Dict[str, List[ Split raw filtered changes into two batches: 1. Blocking: updated/removed and added files that are blocking 2. Non-blocking: added files that are not blocking - - Adds blocking key with a boolean value for each group """ - blocking_changes = {"added": [], "updated": [], "removed": [], "blocking": True} - non_blocking_changes = {"added": [], "updated": [], "removed": [], "blocking": False} + blocking_changes = {"added": [], "updated": [], "removed": []} + non_blocking_changes = {"added": [], "updated": [], "removed": []} for f in changes.get("added", []): if self.is_blocking_file(f): @@ -144,9 +142,9 @@ def _split_by_type(self, changes: Dict[str, List[dict]]) -> List[Dict[str, List[ blocking_changes["removed"].append(f) result = [] - if any(blocking_changes[k] for k in ("added", "updated", "removed")): + if any(len(v) for v in blocking_changes.values()): result.append(blocking_changes) - if any(non_blocking_changes["added"]): + if any(len(v) for v in non_blocking_changes.values()): result.append(non_blocking_changes) return result diff --git a/mergin/common.py b/mergin/common.py index 9e65a6c..c681841 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -15,7 +15,7 @@ this_dir = os.path.dirname(os.path.realpath(__file__)) -# Error code from the public API, add to the end of enum as we handle more eror +# Error code from the public API, add to the end of enum as we handle more error class ErrorCode(Enum): ProjectsLimitHit = "ProjectsLimitHit" StorageLimitHit = "StorageLimitHit" diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 82b5092..5e46ac7 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -686,7 +686,7 @@ def apply_push_changes(self, changes): """ For geodiff files update basefiles according to changes pushed to server. - :param changes: metadata for pulled files + :param changes: metadata for pushed files :type changes: dict[str, list[dict]] """ for k, v in changes.items(): From a57709234b6afffa0da3380c5ed0a7fb14165846 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Wed, 25 Jun 2025 13:41:30 +0200 Subject: [PATCH 08/31] Replace sum() with faster any() in boolean expressions --- mergin/client_push.py | 2 +- mergin/test/test_client.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index 4474060..99e4d1a 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -220,7 +220,7 @@ def push_project_async(mc, directory) -> Optional[List[UploadJob]]: if mp.is_versioned_file(f["path"]): mp.copy_versioned_file_for_upload(f, tmp_dir.name) - if not sum(len(v) for v in changes.values()): + if not any(len(v) for v in changes.values()): mp.log.info(f"--- push {project_path} - nothing to do") return diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 049a0ed..dfdd7ee 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -304,7 +304,7 @@ def test_push_pull_changes(mc): # check changes before applied pull_changes, push_changes, _ = mc.project_status(project_dir) - assert not sum(len(v) for v in pull_changes.values()) + assert not any(len(v) for v in pull_changes.values()) assert next((f for f in push_changes["added"] if f["path"] == f_added), None) assert next((f for f in push_changes["removed"] if f["path"] == f_removed), None) assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) @@ -388,7 +388,7 @@ def test_cancel_push(mc): # check changes before applied pull_changes, push_changes, _ = mc.project_status(project_dir) - assert not sum(len(v) for v in pull_changes.values()) + assert not any(len(v) for v in pull_changes.values()) assert next((f for f in push_changes["added"] if f["path"] == f_added), None) assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) @@ -2631,8 +2631,8 @@ def test_editor_push(mc: MerginClient, mc2: MerginClient): assert any(file["path"] == txt_file_name for file in project_info.get("files")) is True assert any(file["path"] == gpkg_file_name for file in project_info.get("files")) is True pull_changes, push_changes, push_changes_summary = mc.project_status(project_dir) - assert not sum(len(v) for v in pull_changes.values()) - assert not sum(len(v) for v in push_changes.values()) + assert not any(len(v) for v in pull_changes.values()) + assert not any(len(v) for v in push_changes.values()) # editor is trying to push row to gpkg file -> it's possible shutil.copy( From 4500d0bf1e2053edb972336c78f5c1907b5515a5 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Thu, 26 Jun 2025 13:22:45 +0200 Subject: [PATCH 09/31] Fix tests and codestyle --- mergin/client.py | 9 ++- mergin/client_push.py | 1 - mergin/test/test_client.py | 153 +++++++++++++++++++------------------ scripts/update_version.py | 6 +- setup.py | 44 +++++------ 5 files changed, 107 insertions(+), 106 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 0925ea5..d30443a 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -81,7 +81,7 @@ class MerginClient: """ def __init__(self, url=None, auth_token=None, login=None, password=None, plugin_version=None, proxy_config=None): - self.url = url if url is not None else MerginClient.default_url() + self.url = (url if url is not None else MerginClient.default_url()).rstrip("/") + "/" self._auth_params = None self._auth_session = None self._user_info = None @@ -453,7 +453,8 @@ def create_project(self, project_name, is_public=False, namespace=None): def create_project_and_push(self, project_name, directory, is_public=False, namespace=None): """ - Convenience method to create project and push the initial version right after that. + Convenience method to create project and push the the files right after that. + Creates two versions when directory contains blocking and non-blocking changes. :param project_name: Project's full name (/) :type project_name: String @@ -877,9 +878,9 @@ def push_project(self, directory): :type directory: String """ jobs = push_project_async(self, directory) + if not jobs: + return # there is nothing to push (or we only deleted some files) for job in jobs: - if job is None: - return # there is nothing to push (or we only deleted some files) push_project_wait(job) push_project_finalize(job) diff --git a/mergin/client_push.py b/mergin/client_push.py index 99e4d1a..b506b2f 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -415,4 +415,3 @@ def remove_diff_files(job) -> None: diff_file = job.mp.fpath_meta(change["diff"]["path"]) if os.path.exists(diff_file): os.remove(diff_file) - diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index dfdd7ee..88c7c31 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -85,7 +85,7 @@ def mcStorage(request): client_workspace_storage = client_workspace["storage"] def teardown(): - # back to original values... (1 project, api allowed ...) + # back to original values... (2 projects, api allowed ...) client.patch( f"/v1/tests/workspaces/{client_workspace_id}", {"limits_override": get_limit_overrides(client_workspace_storage)}, @@ -233,18 +233,18 @@ def test_create_remote_project_from_local(mc): assert source_mp.project_full_name() == f"{API_USER}/{test_project}" assert source_mp.project_name() == test_project assert source_mp.workspace_name() == API_USER - assert source_mp.version() == "v1" + assert source_mp.version() == "v2" # data was split to two pushes - blocking and non-blocking # check basic metadata about created project project_info = mc.project_info(project) - assert project_info["version"] == "v1" + assert project_info["version"] == "v2" assert project_info["name"] == test_project assert project_info["namespace"] == API_USER assert project_info["id"] == source_mp.project_id() # check project metadata retrieval by id project_info = mc.project_info(source_mp.project_id()) - assert project_info["version"] == "v1" + assert project_info["version"] == "v2" assert project_info["name"] == test_project assert project_info["namespace"] == API_USER assert project_info["id"] == source_mp.project_id() @@ -285,7 +285,7 @@ def test_push_pull_changes(mc): assert mp2.project_full_name() == f"{API_USER}/{test_project}" assert mp2.project_name() == test_project assert mp2.workspace_name() == API_USER - assert mp2.version() == "v1" + assert mp2.version() == "v2" # two pushes - blocking and non-blocking # test push changes (add, remove, rename, update) f_added = "new.txt" @@ -317,10 +317,10 @@ def test_push_pull_changes(mc): mp = MerginProject(project_dir) assert mp.project_full_name() == f"{API_USER}/{test_project}" - assert mp.version() == "v2" + assert mp.version() == "v4" project_info = mc.project_info(project) - assert project_info["version"] == "v2" + assert project_info["version"] == "v4" assert not next((f for f in project_info["files"] if f["path"] == f_removed), None) assert not next((f for f in project_info["files"] if f["path"] == f_renamed), None) assert next((f for f in project_info["files"] if f["path"] == "renamed.txt"), None) @@ -329,7 +329,7 @@ def test_push_pull_changes(mc): assert generate_checksum(os.path.join(project_dir, f_updated)) == f_remote_checksum assert project_info["id"] == mp.project_id() assert len(project_info["files"]) == len(mp.inspect_files()) - project_version = mc.project_version_info(project_info["id"], "v2") + project_version = mc.project_version_info(project_info["id"], "v3") updated_file = [f for f in project_version["changes"]["updated"] if f["path"] == f_updated][0] assert "origin_checksum" not in updated_file # internal client info @@ -355,9 +355,9 @@ def test_push_pull_changes(mc): assert not os.path.exists(os.path.join(project_dir_2, f_removed)) assert not os.path.exists(os.path.join(project_dir_2, f_renamed)) assert os.path.exists(os.path.join(project_dir_2, "renamed.txt")) - assert os.path.exists(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 1))) + assert os.path.exists(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 2))) assert ( - generate_checksum(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 1))) + generate_checksum(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 2))) == f_conflict_checksum ) assert generate_checksum(os.path.join(project_dir_2, f_updated)) == f_remote_checksum @@ -403,7 +403,7 @@ def test_cancel_push(mc): # download the project to a different directory and check the version and content mc.download_project(project, project_dir_2) mp = MerginProject(project_dir_2) - assert mp.version() == "v2" + assert mp.version() == "v4" # 2 pushes when created + 2 when modified with add and update changes assert os.path.exists(os.path.join(project_dir_2, f_added)) with open(os.path.join(project_dir_2, f_updated), "r") as f: assert f.read() == modification @@ -465,7 +465,7 @@ def test_sync_diff(mc): # check project after push project_info = mc.project_info(project) - assert project_info["version"] == "v3" + assert project_info["version"] == "v4" # 2 pushes when project was created assert project_info["id"] == mp.project_id() f_remote = next((f for f in project_info["files"] if f["path"] == f_updated), None) assert next((f for f in project_info["files"] if f["path"] == "renamed.gpkg"), None) @@ -563,7 +563,9 @@ def test_force_gpkg_update(mc): # check project after push project_info = mc.project_info(project) - assert project_info["version"] == "v2" + assert ( + project_info["version"] == "v3" + ) # two pushes while creating the project with blocking and non-blocking change f_remote = next((f for f in project_info["files"] if f["path"] == f_updated), None) assert "diff" not in f_remote @@ -961,19 +963,19 @@ def test_download_versions(mc): mc.push_project(project_dir) project_info = mc.project_info(project) - assert project_info["version"] == "v2" + assert project_info["version"] == "v3" # 2 versions created with initial push of blocking and non-blocking files - mc.download_project(project, project_dir_v1, "v1") + mc.download_project(project, project_dir_v1, "v2") assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) - assert not os.path.exists(os.path.join(project_dir_v2, f_added)) # added only in v2 + assert not os.path.exists(os.path.join(project_dir_v2, f_added)) # added only in v3 - mc.download_project(project, project_dir_v2, "v2") + mc.download_project(project, project_dir_v2, "v3") assert os.path.exists(os.path.join(project_dir_v2, f_added)) - assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) # added in v1 but still present in v2 + assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) # added in v1 but still present in v3 # try to download not-existing version with pytest.raises(ClientError): - mc.download_project(project, project_dir_v3, "v3") + mc.download_project(project, project_dir_v3, "v4") def test_paginated_project_list(mc): @@ -1067,11 +1069,13 @@ def create_versioned_project(mc, project_name, project_dir, updated_file, remove # create remote project shutil.copytree(TEST_DATA_DIR, project_dir) - mc.create_project_and_push(project, project_dir) + mc.create_project_and_push( + project, project_dir + ) # creates two versions because push was split to blocking and non-blocking mp = MerginProject(project_dir) - # create versions 2-4 + # create versions 3-6 changes = ( "inserted_1_A.gpkg", "inserted_1_A_mod.gpkg", @@ -1110,7 +1114,7 @@ def test_get_versions_with_file_changes(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False) project_info = mc.project_info(project) - assert project_info["version"] == "v4" + assert project_info["version"] == "v5" assert project_info["id"] == mp.project_id() file_history = mc.project_file_history_info(project, f_updated) @@ -1120,21 +1124,21 @@ def test_get_versions_with_file_changes(mc): project, f_updated, version_from="v1", - version_to="v5", + version_to="v6", file_history=file_history, ) - assert "Wrong version parameters: 1-5" in str(e.value) - assert "Available versions: [1, 2, 3, 4]" in str(e.value) + assert "Wrong version parameters: 1-6" in str(e.value) + assert "Available versions: [1, 3, 4, 5]" in str(e.value) mod_versions = get_versions_with_file_changes( mc, project, f_updated, - version_from="v2", - version_to="v4", + version_from="v3", + version_to="v5", file_history=file_history, ) - assert mod_versions == [f"v{i}" for i in range(2, 5)] + assert mod_versions == [f"v{i}" for i in range(3, 6)] def check_gpkg_same_content(mergin_project, gpkg_path_1, gpkg_path_2): @@ -1155,7 +1159,7 @@ def test_download_file(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated) project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v6" assert project_info["id"] == mp.project_id() # Versioned file should have the following content at versions 2-4 @@ -1167,14 +1171,14 @@ def test_download_file(mc): # Download the base file at versions 2-4 and check the changes f_downloaded = os.path.join(project_dir, f_updated) - for ver in range(2, 5): + for ver in range(3, 6): mc.download_file(project_dir, f_updated, f_downloaded, version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content assert check_gpkg_same_content(mp, f_downloaded, expected) # make sure there will be exception raised if a file doesn't exist in the version - with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): - mc.download_file(project_dir, f_updated, f_downloaded, version="v5") + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v6"): + mc.download_file(project_dir, f_updated, f_downloaded, version="v6") def test_download_diffs(mc): @@ -1189,24 +1193,24 @@ def test_download_diffs(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False) project_info = mc.project_info(project) - assert project_info["version"] == "v4" + assert project_info["version"] == "v5" assert project_info["id"] == mp.project_id() - # Download diffs of updated file between versions 1 and 2 - mc.get_file_diff(project_dir, f_updated, diff_file, "v1", "v2") + # Download diffs of updated file between versions 1 and 3 + mc.get_file_diff(project_dir, f_updated, diff_file, "v1", "v3") assert os.path.exists(diff_file) assert mp.geodiff.has_changes(diff_file) assert mp.geodiff.changes_count(diff_file) == 1 - changes_file = diff_file + ".changes1-2" + changes_file = diff_file + ".changes1-3" mp.geodiff.list_changes_summary(diff_file, changes_file) with open(changes_file, "r") as f: changes = json.loads(f.read())["geodiff_summary"][0] assert changes["insert"] == 1 assert changes["update"] == 0 - # Download diffs of updated file between versions 2 and 4 - mc.get_file_diff(project_dir, f_updated, diff_file, "v2", "v4") - changes_file = diff_file + ".changes2-4" + # Download diffs of updated file between versions 3 and 5 + mc.get_file_diff(project_dir, f_updated, diff_file, "v3", "v5") + changes_file = diff_file + ".changes3-5" mp.geodiff.list_changes_summary(diff_file, changes_file) with open(changes_file, "r") as f: changes = json.loads(f.read())["geodiff_summary"][0] @@ -1214,14 +1218,14 @@ def test_download_diffs(mc): assert changes["update"] == 1 with pytest.raises(ClientError) as e: - mc.get_file_diff(project_dir, f_updated, diff_file, "v4", "v1") + mc.get_file_diff(project_dir, f_updated, diff_file, "v5", "v1") assert "Wrong version parameters" in str(e.value) assert "version_from needs to be smaller than version_to" in str(e.value) with pytest.raises(ClientError) as e: - mc.get_file_diff(project_dir, f_updated, diff_file, "v4", "v5") + mc.get_file_diff(project_dir, f_updated, diff_file, "v5", "v6") assert "Wrong version parameters" in str(e.value) - assert "Available versions: [1, 2, 3, 4]" in str(e.value) + assert "Available versions: [1, 3, 4, 5]" in str(e.value) def test_modify_project_permissions(mc): @@ -1988,13 +1992,13 @@ def test_project_versions_list(mc): project_dir = os.path.join(TMP_DIR, test_project) create_versioned_project(mc, test_project, project_dir, "base.gpkg") project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v6" # get all versions versions = mc.project_versions(project) - assert len(versions) == 5 + assert len(versions) == 6 assert versions[0]["name"] == "v1" - assert versions[-1]["name"] == "v5" + assert versions[-1]["name"] == "v6" # get first 3 versions versions = mc.project_versions(project, to="v3") @@ -2003,7 +2007,7 @@ def test_project_versions_list(mc): # get last 2 versions versions = mc.project_versions(project, since="v4") - assert len(versions) == 2 + assert len(versions) == 3 assert versions[0]["name"] == "v4" # get range v2-v4 @@ -2013,11 +2017,11 @@ def test_project_versions_list(mc): assert versions[-1]["name"] == "v4" versions_count = mc.project_versions_count(project) - assert versions_count == 5 + assert versions_count == 6 versions, _ = mc.paginated_project_versions(project, page=1, descending=True) - assert len(versions) == 5 - assert versions[0]["name"] == "v5" + assert len(versions) == 6 + assert versions[0]["name"] == "v6" assert versions[-1]["name"] == "v1" @@ -2028,10 +2032,10 @@ def test_report(mc): f_updated = "base.gpkg" mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False, overwrite=True) - # create report for between versions 2 and 4 + # create report for between versions 3 and 5 directory = mp.dir - since = "v2" - to = "v4" + since = "v3" + to = "v5" proj_name = project.replace(os.path.sep, "-") report_file = os.path.join(TMP_DIR, "report", f"{proj_name}-{since}-{to}.csv") if os.path.exists(report_file): @@ -2058,7 +2062,7 @@ def test_report(mc): ) assert headers in content assert f"base.gpkg,simple,{API_USER}" in content - assert "v3,update,,,2" in content + assert "v4,update,,,2" in content # files not edited are not in reports assert "inserted_1_A.gpkg" not in content @@ -2067,7 +2071,7 @@ def test_report(mc): assert warnings # do report for v1 with added files and v5 with overwritten file - warnings = create_report(mc, directory, "v1", "v5", report_file) + warnings = create_report(mc, directory, "v1", "v6", report_file) assert warnings # rm local Mergin Maps project and try again @@ -2175,11 +2179,11 @@ def test_changesets_download(mc): mp = MerginProject(project_dir) os.makedirs(download_dir, exist_ok=True) - diff_file = os.path.join(download_dir, "base-v1-2.diff") - mc.get_file_diff(project_dir, test_gpkg, diff_file, "v1", "v2") + diff_file = os.path.join(download_dir, "base-v1-3.diff") + mc.get_file_diff(project_dir, test_gpkg, diff_file, "v1", "v3") assert os.path.exists(diff_file) assert mp.geodiff.has_changes(diff_file) - assert mp.geodiff.changes_count(diff_file) == 1 + assert mp.geodiff.changes_count(diff_file) == 3 diff_file = os.path.join(download_dir, "base-v2-3.diff") mc.get_file_diff(project_dir, test_gpkg, diff_file, "v2", "v3") @@ -2216,10 +2220,10 @@ def test_version_info(mc): shutil.copy(os.path.join(TEST_DATA_DIR, "inserted_1_A_mod.gpkg"), file_path) mc.push_project(project_dir) project_info = mc.project_info(project) - info = mc.project_version_info(project_info.get("id"), "v2") + info = mc.project_version_info(project_info.get("id"), "v3") assert info["namespace"] == API_USER assert info["project_name"] == test_project - assert info["name"] == "v2" + assert info["name"] == "v3" assert info["author"] == API_USER created = datetime.strptime(info["created"], "%Y-%m-%dT%H:%M:%SZ") assert created.date() == date.today() @@ -2345,8 +2349,8 @@ def test_reset_local_changes(mc: MerginClient): os.remove(mp.fpath("test.txt")) os.remove(mp.fpath("test_dir/test2.txt")) - # download version 2 and create MerginProject for it - mc.download_project(project, project_dir_2, version="v2") + # download version 3 and create MerginProject for it + mc.download_project(project, project_dir_2, version="v3") mp = MerginProject(project_dir_2) # make some changes @@ -2361,7 +2365,7 @@ def test_reset_local_changes(mc: MerginClient): assert len(push_changes["removed"]) == 2 assert len(push_changes["updated"]) == 0 - # reset back to original version we had - v2 + # reset back to original version we had - v3 mc.reset_local_changes(project_dir_2) # push changes after the reset - should be none @@ -2441,7 +2445,7 @@ def test_project_rename(mc: MerginClient): # validate project info project_info = mc.project_info(project_renamed) - assert project_info["version"] == "v1" + assert project_info["version"] == "v2" # teo version created in initial push assert project_info["name"] == test_project_renamed assert project_info["namespace"] == API_USER with pytest.raises(ClientError, match="The requested URL was not found on the server"): @@ -2482,7 +2486,7 @@ def test_download_files(mc: MerginClient): mp = create_versioned_project(mc, test_project, project_dir, f_updated) project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v6" assert project_info["id"] == mp.project_id() # Versioned file should have the following content at versions 2-4 @@ -2495,15 +2499,15 @@ def test_download_files(mc: MerginClient): downloaded_file = os.path.join(download_dir, f_updated) # if output_paths is specified look at that location - for ver in range(2, 5): + for ver in range(3, 6): mc.download_files(project_dir, [f_updated], [downloaded_file], version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content assert check_gpkg_same_content(mp, downloaded_file, expected) # if output_paths is not specified look in the mergin project folder - for ver in range(2, 5): + for ver in range(3, 6): mc.download_files(project_dir, [f_updated], version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content assert check_gpkg_same_content(mp, mp.fpath(f_updated), expected) # download two files from v1 and check their content @@ -2514,7 +2518,7 @@ def test_download_files(mc: MerginClient): project_dir, [f_updated, file_2], [downloaded_file, downloaded_file_2], - version="v1", + version="v2", ) assert check_gpkg_same_content(mp, downloaded_file, os.path.join(TEST_DATA_DIR, f_updated)) @@ -2526,11 +2530,11 @@ def test_download_files(mc: MerginClient): assert content_exp == content # make sure there will be exception raised if a file doesn't exist in the version - with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): - mc.download_files(project_dir, [f_updated], version="v5") + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v6"): + mc.download_files(project_dir, [f_updated], version="v6") - with pytest.raises(ClientError, match=f"No \\[non_existing\\.file\\] exists at version v3"): - mc.download_files(project_dir, [f_updated, "non_existing.file"], version="v3") + with pytest.raises(ClientError, match=f"No \\[non_existing\\.file\\] exists at version v4"): + mc.download_files(project_dir, [f_updated, "non_existing.file"], version="v4") def test_download_failure(mc): @@ -2683,6 +2687,7 @@ def test_editor_push(mc: MerginClient, mc2: MerginClient): def test_error_push_already_named_project(mc: MerginClient): test_project = "test_push_already_existing" project_dir = os.path.join(TMP_DIR, test_project) + cleanup(mc, test_project, [project_dir]) with pytest.raises(ClientError) as e: mc.create_project_and_push(test_project, project_dir) diff --git a/scripts/update_version.py b/scripts/update_version.py index 184d6a8..2020659 100644 --- a/scripts/update_version.py +++ b/scripts/update_version.py @@ -4,7 +4,7 @@ def replace_in_file(filepath, regex, sub): - with open(filepath, 'r') as f: + with open(filepath, "r") as f: content = f.read() content_new = re.sub(regex, sub, content, flags=re.M) @@ -15,14 +15,14 @@ def replace_in_file(filepath, regex, sub): dir_path = os.path.dirname(os.path.realpath(__file__)) parser = argparse.ArgumentParser() -parser.add_argument('--version', help='version to replace') +parser.add_argument("--version", help="version to replace") args = parser.parse_args() ver = args.version print("using version " + ver) about_file = os.path.join(dir_path, os.pardir, "mergin", "version.py") print("patching " + about_file) -replace_in_file(about_file, "__version__\s=\s\".*", "__version__ = \"" + ver + "\"") +replace_in_file(about_file, '__version__\s=\s".*', '__version__ = "' + ver + '"') setup_file = os.path.join(dir_path, os.pardir, "setup.py") print("patching " + setup_file) diff --git a/setup.py b/setup.py index 5bc34ac..2e3a4fb 100644 --- a/setup.py +++ b/setup.py @@ -4,35 +4,31 @@ from setuptools import setup, find_packages setup( - name='mergin-client', - version='0.10.0', - url='https://github.com/MerginMaps/python-api-client', - license='MIT', - author='Lutra Consulting Ltd.', - author_email='info@merginmaps.com', - description='Mergin Maps utils and client', - long_description='Mergin Maps utils and client', - + name="mergin-client", + version="0.10.0", + url="https://github.com/MerginMaps/python-api-client", + license="MIT", + author="Lutra Consulting Ltd.", + author_email="info@merginmaps.com", + description="Mergin Maps utils and client", + long_description="Mergin Maps utils and client", packages=find_packages(), - - platforms='any', + platforms="any", install_requires=[ - 'python-dateutil==2.8.2', - 'pygeodiff==2.0.4', - 'pytz==2022.1', - 'click==8.1.3', + "python-dateutil==2.8.2", + "pygeodiff==2.0.4", + "pytz==2022.1", + "click==8.1.3", ], - entry_points={ - 'console_scripts': ['mergin=mergin.cli:cli'], + "console_scripts": ["mergin=mergin.cli:cli"], }, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3' + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", ], - package_data={'mergin': ['cert.pem']} + package_data={"mergin": ["cert.pem"]}, ) From b326fba3acbfec9fe51f69656e15114c27efaa05 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 16 May 2025 14:43:09 +0200 Subject: [PATCH 10/31] login type and checks --- mergin/client.py | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index d30443a..025e21a 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -52,6 +52,16 @@ class ServerType(Enum): SAAS = auto() # Server is SaaS +class LoginType(Enum): + """Types of login supported by Mergin Maps.""" + + PASSWORD = "password" # classic login with username and password + SSO = "sso" # login with SSO token + + def __str__(self) -> str: + return self.value + + def decode_token_data(token): token_prefix = "Bearer ." if not token.startswith(token_prefix): @@ -80,7 +90,16 @@ class MerginClient: Currently, only HTTP proxies are supported. """ - def __init__(self, url=None, auth_token=None, login=None, password=None, plugin_version=None, proxy_config=None): + def __init__( + self, + url=None, + auth_token=None, + login=None, + password=None, + plugin_version=None, + proxy_config=None, + login_type: LoginType = LoginType.PASSWORD, + ): self.url = (url if url is not None else MerginClient.default_url()).rstrip("/") + "/" self._auth_params = None self._auth_session = None @@ -88,6 +107,7 @@ def __init__(self, url=None, auth_token=None, login=None, password=None, plugin_ self._server_type = None self._server_version = None self.client_version = "Python-client/" + __version__ + self._login_type = login_type if plugin_version is not None: # this could be e.g. "Plugin/2020.1 QGIS/3.14" self.client_version += " " + plugin_version self.setup_logging() @@ -134,15 +154,20 @@ def __init__(self, url=None, auth_token=None, login=None, password=None, plugin_ self.opener = urllib.request.build_opener(*handlers, https_handler) urllib.request.install_opener(self.opener) - if login and not password: - raise ClientError("Unable to log in: no password provided for '{}'".format(login)) - if password and not login: - raise ClientError("Unable to log in: password provided but no username/email") + if self._login_type == LoginType.PASSWORD: + if login and not password: + raise ClientError("Unable to log in: no password provided for '{}'".format(login)) + if password and not login: + raise ClientError("Unable to log in: password provided but no username/email") + + if login and password: + self._auth_params = {"login": login, "password": password} + if not self._auth_session: + self.login(login, password) - if login and password: - self._auth_params = {"login": login, "password": password} + elif self._login_type == LoginType.SSO: if not self._auth_session: - self.login(login, password) + raise ClientError("Unable to log in: no auth token provided for SSO login") def setup_logging(self): """Setup Mergin Maps client logging.""" From 5484eee1bbc61a3fa52676edf33025af5de84ad0 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 16 May 2025 14:44:52 +0200 Subject: [PATCH 11/31] raise auth token error --- mergin/client.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 025e21a..3c3e642 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -45,6 +45,10 @@ class TokenError(Exception): pass +class AuthTokenExpiredError(Exception): + pass + + class ServerType(Enum): OLD = auto() # Server is old and does not support workspaces CE = auto() # Server is Community Edition @@ -214,12 +218,18 @@ def wrapper(self, *args): # Refresh auth token if it expired or will expire very soon delta = self._auth_session["expire"] - datetime.now(timezone.utc) if delta.total_seconds() < 5: - self.log.info("Token has expired - refreshing...") - self.login(self._auth_params["login"], self._auth_params["password"]) + if self._login_type == LoginType.PASSWORD: + self.log.info("Token has expired - refreshing...") + self.login(self._auth_params["login"], self._auth_params["password"]) + elif self._login_type == LoginType.SSO: + raise AuthTokenExpiredError("Token has expired - please re-login") else: # Create a new authorization token - self.log.info(f"No token - login user: {self._auth_params['login']}") - self.login(self._auth_params["login"], self._auth_params["password"]) + if self._login_type == LoginType.PASSWORD: + self.log.info(f"No token - login user: {self._auth_params['login']}") + self.login(self._auth_params["login"], self._auth_params["password"]) + elif self._login_type == LoginType.SSO: + raise AuthTokenExpiredError("Token has expired - please re-login") return f(self, *args) return wrapper From 4a57c86365a3ff9c1d78b9ea4c1bdc770dbe57b5 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 11:09:37 +0200 Subject: [PATCH 12/31] remove enum --- mergin/client.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 3c3e642..ad7aea8 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -56,16 +56,6 @@ class ServerType(Enum): SAAS = auto() # Server is SaaS -class LoginType(Enum): - """Types of login supported by Mergin Maps.""" - - PASSWORD = "password" # classic login with username and password - SSO = "sso" # login with SSO token - - def __str__(self) -> str: - return self.value - - def decode_token_data(token): token_prefix = "Bearer ." if not token.startswith(token_prefix): From 0b1049fdfe8a011db75b37a1eadf5071e572dd96 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 11:10:02 +0200 Subject: [PATCH 13/31] remove --- mergin/client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index ad7aea8..997f852 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -92,7 +92,6 @@ def __init__( password=None, plugin_version=None, proxy_config=None, - login_type: LoginType = LoginType.PASSWORD, ): self.url = (url if url is not None else MerginClient.default_url()).rstrip("/") + "/" self._auth_params = None @@ -101,7 +100,6 @@ def __init__( self._server_type = None self._server_version = None self.client_version = "Python-client/" + __version__ - self._login_type = login_type if plugin_version is not None: # this could be e.g. "Plugin/2020.1 QGIS/3.14" self.client_version += " " + plugin_version self.setup_logging() From be9c23b26ae4c234acfa6402660e591ff15738f4 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 11:10:49 +0200 Subject: [PATCH 14/31] update --- mergin/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 997f852..3074396 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -146,7 +146,7 @@ def __init__( self.opener = urllib.request.build_opener(*handlers, https_handler) urllib.request.install_opener(self.opener) - if self._login_type == LoginType.PASSWORD: + if login or password: if login and not password: raise ClientError("Unable to log in: no password provided for '{}'".format(login)) if password and not login: @@ -157,9 +157,9 @@ def __init__( if not self._auth_session: self.login(login, password) - elif self._login_type == LoginType.SSO: + else: if not self._auth_session: - raise ClientError("Unable to log in: no auth token provided for SSO login") + raise ClientError("Unable to log in: no auth token provided for login") def setup_logging(self): """Setup Mergin Maps client logging.""" From 75059b99e720572bd7d775bd21c9bbb9220dede5 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 11:11:29 +0200 Subject: [PATCH 15/31] raise error --- mergin/client.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 3074396..9668f2f 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -206,18 +206,20 @@ def wrapper(self, *args): # Refresh auth token if it expired or will expire very soon delta = self._auth_session["expire"] - datetime.now(timezone.utc) if delta.total_seconds() < 5: - if self._login_type == LoginType.PASSWORD: + self.log.info("Token has expired - refreshing...") + if self._auth_params.get("login", None) and self._auth_params.get("password", None): self.log.info("Token has expired - refreshing...") self.login(self._auth_params["login"], self._auth_params["password"]) - elif self._login_type == LoginType.SSO: + else: raise AuthTokenExpiredError("Token has expired - please re-login") else: # Create a new authorization token - if self._login_type == LoginType.PASSWORD: - self.log.info(f"No token - login user: {self._auth_params['login']}") + self.log.info(f"No token - login user: {self._auth_params['login']}") + if self._auth_params.get("login", None) and self._auth_params.get("password", None): self.login(self._auth_params["login"], self._auth_params["password"]) - elif self._login_type == LoginType.SSO: - raise AuthTokenExpiredError("Token has expired - please re-login") + else: + raise ClientError("Missing login or password") + return f(self, *args) return wrapper From c42bcc7b84357c5d1cb7c69eebb5499c78e02a63 Mon Sep 17 00:00:00 2001 From: Alexandra Bucha Rasova <94905350+alex-cit@users.noreply.github.com> Date: Sun, 22 Jun 2025 07:16:23 +0200 Subject: [PATCH 16/31] Jupyter notebooks - Update README.md --- examples/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/README.md b/examples/README.md index b41a197..e4e953a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -5,10 +5,10 @@ Here you can find some tailored Jupyter Notebooks examples on how to interact wi These examples are split into scenarios in order to highlight some of the core capabilities of the Mergin Maps API. ## [Scenario 1](01_users.ipynb) - Users Management -On this scenario you'll create some random users from an existing CSV into Mergin Maps, change user's `ROLE` on a `WORKSPACE` and `PROJECT` perspective as well remove user's from a specific project. +In this scenario you'll create some random users from an existing CSV into Mergin Maps, change user's `ROLE` on a `WORKSPACE` and `PROJECT` perspective as well remove a user from a specific project. -## [Scenario 2](02_sync.ipynb) - Synchronization -On this scenario you'll learn on how to do basic synchronisation (`PUSH`, `PULL`) of projects with the Mergin Maps Python API client. +## [Scenario 2](02_sync.ipynb) - Synchronisation +In this scenario you'll learn how to do basic synchronisation (`PUSH`, `PULL`) of projects with the Mergin Maps Python API client. ## [Scenario 3](03_projects.ipynb) - Projects Management -On this scenario you'll learn how to manage project with the Mergin Maps Python API client, namely how to clone projects and how to separate team members in isolated projects from the cloned template. \ No newline at end of file +In this scenario you'll learn how to manage projects with Mergin Maps Python API client, namely how to clone projects and how to separate team members in isolated projects from the cloned template. From cd3b55a729ef2506ce98af56a62f0e209abce6df Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 13:54:24 +0200 Subject: [PATCH 17/31] allow client creation without authorization --- mergin/client.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 9668f2f..6cfaec1 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -157,10 +157,6 @@ def __init__( if not self._auth_session: self.login(login, password) - else: - if not self._auth_session: - raise ClientError("Unable to log in: no auth token provided for login") - def setup_logging(self): """Setup Mergin Maps client logging.""" client_log_file = os.environ.get("MERGIN_CLIENT_LOG", None) From 2c81a146e2760db190912d83623882c5693386b4 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 14:02:13 +0200 Subject: [PATCH 18/31] add test --- mergin/test/test_client.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 88c7c31..2a2c7ab 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -2878,3 +2878,18 @@ def server_config(self): with pytest.raises(ClientError, match="The requested URL was not found on the server"): mc.send_logs(logs_path) + + +def test_mc_without_login(): + + # client without login should be able to access server config + mc = MerginClient(SERVER_URL) + config = mc.server_config() + assert config + assert isinstance(config, dict) + assert "server_configured" in config + assert config["server_configured"] + + # without login should not be able to access workspaces + with pytest.raises(ClientError, match="The requested URL was not found on the server"): + mc.workspaces_list() From 51127abe9fdcba5c9f55c96a5e26ff63bc646a58 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 14:34:47 +0200 Subject: [PATCH 19/31] simplify --- mergin/client.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 6cfaec1..bfac501 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -146,16 +146,15 @@ def __init__( self.opener = urllib.request.build_opener(*handlers, https_handler) urllib.request.install_opener(self.opener) - if login or password: - if login and not password: - raise ClientError("Unable to log in: no password provided for '{}'".format(login)) - if password and not login: - raise ClientError("Unable to log in: password provided but no username/email") - - if login and password: - self._auth_params = {"login": login, "password": password} - if not self._auth_session: - self.login(login, password) + if login and not password: + raise ClientError("Unable to log in: no password provided for '{}'".format(login)) + if password and not login: + raise ClientError("Unable to log in: password provided but no username/email") + + if login and password: + self._auth_params = {"login": login, "password": password} + if not self._auth_session: + self.login(login, password) def setup_logging(self): """Setup Mergin Maps client logging.""" From 296de651710185c3e8f09ffaa4500f8650b255e7 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 18:23:33 +0200 Subject: [PATCH 20/31] change error --- mergin/test/test_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 2a2c7ab..d27972f 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -10,6 +10,7 @@ import pytz import sqlite3 import glob +from urllib.error import HTTPError from .. import InvalidProject from ..client import ( @@ -2891,5 +2892,5 @@ def test_mc_without_login(): assert config["server_configured"] # without login should not be able to access workspaces - with pytest.raises(ClientError, match="The requested URL was not found on the server"): + with pytest.raises(HTTPError): mc.workspaces_list() From dfa886800f9c8d24becc60e0c506c2ead530515e Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Jun 2025 20:20:31 +0200 Subject: [PATCH 21/31] fix catch error --- mergin/test/test_client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index d27972f..4ce5fd5 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -10,7 +10,6 @@ import pytz import sqlite3 import glob -from urllib.error import HTTPError from .. import InvalidProject from ..client import ( @@ -2892,5 +2891,5 @@ def test_mc_without_login(): assert config["server_configured"] # without login should not be able to access workspaces - with pytest.raises(HTTPError): + with pytest.raises(ClientError, match="Authentication information is missing or invalid."): mc.workspaces_list() From e175f0ba5172e95af35d0b762a7ab2c2a8cd5b83 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Thu, 26 Jun 2025 15:20:28 +0200 Subject: [PATCH 22/31] Add test for ChangesHandler --- mergin/client_push.py | 4 ++-- mergin/test/test_client.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index b506b2f..6e0c904 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -126,8 +126,8 @@ def _split_by_type(self, changes: Dict[str, List[dict]]) -> List[Dict[str, List[ 1. Blocking: updated/removed and added files that are blocking 2. Non-blocking: added files that are not blocking """ - blocking_changes = {"added": [], "updated": [], "removed": []} - non_blocking_changes = {"added": [], "updated": [], "removed": []} + blocking_changes = {"added": [], "updated": [], "removed": [], "renamed": []} + non_blocking_changes = {"added": [], "updated": [], "removed": [], "renamed": []} for f in changes.get("added", []): if self.is_blocking_file(f): diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 4ce5fd5..aa31f8b 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -5,6 +5,7 @@ import tempfile import subprocess import shutil +from collections import defaultdict from datetime import datetime, timedelta, date import pytest import pytz @@ -2893,3 +2894,38 @@ def test_mc_without_login(): # without login should not be able to access workspaces with pytest.raises(ClientError, match="Authentication information is missing or invalid."): mc.workspaces_list() + +def sort_dict_of_files_by_path(d): + return { + k: sorted(v, key=lambda f: f["path"]) for k, v in d.items() + } + +def test_changes_handler(mc): + """ + Test methods of the ChangesHandler class + """ + # test _split_by_type + test_project = "test_changes_handler" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) + cleanup(mc, project, [project_dir]) + shutil.copytree(TEST_DATA_DIR, project_dir) + mc.create_project(project) + project_info = mc.project_info(project) + mp = MerginProject(project_dir) + mp.write_metadata(project_dir, project_info) + + mixed_changes = mp.get_push_changes() + changes_handler = ChangesHandler(mc, project_info, mixed_changes) + split_changes = changes_handler._split_by_type(mixed_changes) + assert len(split_changes) == 2 + # all blocking files in the first dict and no blocking file in the second dict + assert all(ChangesHandler.is_blocking_file(f) for files in split_changes[0].values() for f in files) + assert all(not ChangesHandler.is_blocking_file(f) for files in split_changes[1].values() for f in files) + # merge the split changes dicts back into a single dict and check files are the same + merged = defaultdict(list) + for d in split_changes: + for k, v in d.items(): + merged[k].extend(v) + assert sort_dict_of_files_by_path(merged) == sort_dict_of_files_by_path(mixed_changes) + From 513a06aeab3e0ec4b96ff2bc13c01e6f6df9534d Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Tue, 1 Jul 2025 09:38:48 +0200 Subject: [PATCH 23/31] Wait till change upload is done before uploading next change --- mergin/cli.py | 35 +++++---- mergin/client.py | 10 +-- mergin/client_pull.py | 2 +- mergin/client_push.py | 162 +++++++++++++++++++++--------------------- 4 files changed, 108 insertions(+), 101 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index d8b1b3a..8c12493 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -33,7 +33,7 @@ download_project_is_running, ) from mergin.client_pull import pull_project_async, pull_project_is_running, pull_project_finalize, pull_project_cancel -from mergin.client_push import push_project_async, push_project_is_running, push_project_finalize, push_project_cancel +from mergin.client_push import push_next_change, push_project_is_running, push_project_finalize, push_project_cancel from pygeodiff import GeoDiff @@ -412,18 +412,24 @@ def push(ctx): return directory = os.getcwd() try: - jobs = push_project_async(mc, directory) - for job in jobs: - if job is not None: # if job is none, we don't upload any files, and the transaction is finished already - with click.progressbar(length=job.total_size) as bar: - last_transferred_size = 0 - while push_project_is_running(job): - time.sleep(1 / 10) # 100ms - new_transferred_size = job.transferred_size - bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only - last_transferred_size = new_transferred_size - push_project_finalize(job) - click.echo("Done") + # keep going until there are no more changes + while True: + job = push_next_change(mc, directory) + if job is None: + click.echo("All changes uploaded.") + break + + # show progress for this single change upload + with click.progressbar(length=job.total_size, label="Uploading change") as bar: + last_transferred_size = 0 + while push_project_is_running(job): + time.sleep(1 / 10) # 100ms + new_transferred_size = job.transferred_size + bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only + last_transferred_size = new_transferred_size + # finalize this change upload (bump versions on server & locally) + push_project_finalize(job) + click.echo("Change pushed, checking for more…") except InvalidProject as e: click.secho("Invalid project directory ({})".format(str(e)), fg="red") except ClientError as e: @@ -431,8 +437,7 @@ def push(ctx): return except KeyboardInterrupt: click.secho("Cancelling...") - for job in jobs: - push_project_cancel(job) + push_project_cancel(job) except Exception as e: _print_unhandled_exception() diff --git a/mergin/client.py b/mergin/client.py index bfac501..783173a 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -33,7 +33,7 @@ download_diffs_finalize, ) from .client_pull import pull_project_async, pull_project_wait, pull_project_finalize -from .client_push import push_project_async, push_project_wait, push_project_finalize +from .client_push import push_next_change, push_project_wait, push_project_finalize from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -897,10 +897,10 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - jobs = push_project_async(self, directory) - if not jobs: - return # there is nothing to push (or we only deleted some files) - for job in jobs: + while True: + job = push_next_change(self, directory) + if not job: + return # there is nothing to push (or we only deleted some files) push_project_wait(job) push_project_finalize(job) diff --git a/mergin/client_pull.py b/mergin/client_pull.py index a46e6f9..45fd696 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -129,7 +129,7 @@ def download_project_async(mc, project_path, directory, project_version=None): """ if "/" not in project_path: - raise ClientError("Project name needs to be fully qualified, e.g. /") + raise ClientError("Project name needs to be fully qualified, e.g. /") if os.path.exists(directory): raise ClientError("Project directory already exists") os.makedirs(directory) diff --git a/mergin/client_push.py b/mergin/client_push.py index 6e0c904..e791b84 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -16,6 +16,7 @@ import concurrent.futures import os from typing import Dict, List, Optional +import click from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject @@ -154,13 +155,13 @@ def split(self) -> List[Dict[str, List[dict]]]: Applies all configured internal filters and returns a list of change ready to be uploaded. """ changes = self._filter_changes(self._raw_changes) - changes = self._split_by_type(changes) + changes_list = self._split_by_type(changes) # TODO: apply limits; changes = self._limit_by_file_count(changes) - return changes + return changes_list -def push_project_async(mc, directory) -> Optional[List[UploadJob]]: - """Starts push of a project and returns pending upload jobs""" +def push_next_change(mc, directory) -> Optional[UploadJob]: + """Starts push of a change of a project and returns pending upload job""" mp = MerginProject(directory) if mp.has_unfinished_pull(): @@ -197,85 +198,86 @@ def push_project_async(mc, directory) -> Optional[List[UploadJob]]: + f"\n\nLocal version: {local_version}\nServer version: {server_version}" ) - changes = mp.get_push_changes() - changes_handler = ChangesHandler(mc, project_info, changes) - changes_list = changes_handler.split() + all_changes = mp.get_push_changes() + changes_list = ChangesHandler(mc, project_info, all_changes).split() + if not changes_list: + return None - tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") - jobs = [] - - for changes in changes_list: - mp.log.debug("push changes:\n" + pprint.pformat(changes)) - - # If there are any versioned files (aka .gpkg) that are not updated through a diff, - # we need to make a temporary copy somewhere to be sure that we are uploading full content. - # That's because if there are pending transactions, checkpointing or switching from WAL mode - # won't work, and we would end up with some changes left in -wal file which do not get - # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything. - for f in changes["updated"]: - if mp.is_versioned_file(f["path"]) and "diff" not in f: - mp.copy_versioned_file_for_upload(f, tmp_dir.name) - - for f in changes["added"]: - if mp.is_versioned_file(f["path"]): - mp.copy_versioned_file_for_upload(f, tmp_dir.name) - - if not any(len(v) for v in changes.values()): - mp.log.info(f"--- push {project_path} - nothing to do") - return + # take only the first change + change = changes_list[0] + mp.log.debug("push change:\n" + pprint.pformat(change)) - # drop internal info from being sent to server - for item in changes["updated"]: - item.pop("origin_checksum", None) - data = {"version": local_version, "changes": changes} + tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") - try: - resp = mc.post( - f"/v1/project/push/{project_path}", - data, - {"Content-Type": "application/json"}, - ) - except ClientError as err: - mp.log.error("Error starting transaction: " + str(err)) - mp.log.info("--- push aborted") - raise - server_resp = json.load(resp) - - upload_files = data["changes"]["added"] + data["changes"]["updated"] - - transaction_id = server_resp["transaction"] if upload_files else None - job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir) - - if not upload_files: - mp.log.info("not uploading any files") - job.server_resp = server_resp - push_project_finalize(job) - return None # all done - no pending job - - mp.log.info(f"got transaction ID {transaction_id}") - - upload_queue_items = [] - total_size = 0 - # prepare file chunks for upload - for file in upload_files: - if "diff" in file: - # versioned file - uploading diff - file_location = mp.fpath_meta(file["diff"]["path"]) - file_size = file["diff"]["size"] - elif "upload_file" in file: - # versioned file - uploading full (a temporary copy) - file_location = file["upload_file"] - file_size = file["size"] - else: - # non-versioned file - file_location = mp.fpath(file["path"]) - file_size = file["size"] + # If there are any versioned files (aka .gpkg) that are not updated through a diff, + # we need to make a temporary copy somewhere to be sure that we are uploading full content. + # That's because if there are pending transactions, checkpointing or switching from WAL mode + # won't work, and we would end up with some changes left in -wal file which do not get + # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything. + for f in change["updated"]: + if mp.is_versioned_file(f["path"]) and "diff" not in f: + mp.copy_versioned_file_for_upload(f, tmp_dir.name) + + for f in change["added"]: + if mp.is_versioned_file(f["path"]): + mp.copy_versioned_file_for_upload(f, tmp_dir.name) + + if not any(len(v) for v in change.values()): + mp.log.info(f"--- push {project_path} - nothing to do") + return - for chunk_index, chunk_id in enumerate(file["chunks"]): - size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) - upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index)) + # drop internal info from being sent to server + for item in change["updated"]: + item.pop("origin_checksum", None) + data = {"version": local_version, "changes": change} - total_size += file_size + try: + resp = mc.post( + f"/v1/project/push/{project_path}", + data, + {"Content-Type": "application/json"}, + ) + except ClientError as err: + mp.log.error("Error starting transaction: " + str(err)) + mp.log.info("--- push aborted") + raise + server_resp = json.load(resp) + + upload_files = data["changes"]["added"] + data["changes"]["updated"] + + transaction_id = server_resp["transaction"] if upload_files else None + job = UploadJob(project_path, change, transaction_id, mp, mc, tmp_dir) + + if not upload_files: + mp.log.info("not uploading any files") + job.server_resp = server_resp + push_project_finalize(job) + return None # all done - no pending job + + mp.log.info(f"got transaction ID {transaction_id}") + + upload_queue_items = [] + total_size = 0 + # prepare file chunks for upload + for file in upload_files: + if "diff" in file: + # versioned file - uploading diff + file_location = mp.fpath_meta(file["diff"]["path"]) + file_size = file["diff"]["size"] + elif "upload_file" in file: + # versioned file - uploading full (a temporary copy) + file_location = file["upload_file"] + file_size = file["size"] + else: + # non-versioned file + file_location = mp.fpath(file["path"]) + file_size = file["size"] + + for chunk_index, chunk_id in enumerate(file["chunks"]): + size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) + upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index)) + + total_size += file_size job.total_size = total_size job.upload_queue_items = upload_queue_items @@ -287,8 +289,8 @@ def push_project_async(mc, directory) -> Optional[List[UploadJob]]: for item in upload_queue_items: future = job.executor.submit(_do_upload, item, job) job.futures.append(future) - jobs.append(job) - return jobs + + return job def push_project_wait(job): From 4bfa65c086180c5ba5acdf742689151314d7df8a Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Wed, 9 Jul 2025 17:50:24 +0200 Subject: [PATCH 24/31] Syncer --- README.md | 1 + mergin/cli.py | 62 ++++++++++++---- mergin/client.py | 39 +++++++++-- mergin/client_push.py | 42 +++++------ mergin/client_sync.py | 81 +++++++++++++++++++++ mergin/merginproject.py | 8 ++- mergin/test/test_client.py | 140 +++++++++++++++++++++++++++++++++++-- 7 files changed, 327 insertions(+), 46 deletions(-) create mode 100644 mergin/client_sync.py diff --git a/README.md b/README.md index bb31546..010282a 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ Commands: show-file-history Display information about a single version of a... show-version Display information about a single version of a... status Show all changes in project files - upstream and... + sync Syncronize the project. ``` ### Examples diff --git a/mergin/cli.py b/mergin/cli.py index 8c12493..5d890f0 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -33,8 +33,8 @@ download_project_is_running, ) from mergin.client_pull import pull_project_async, pull_project_is_running, pull_project_finalize, pull_project_cancel -from mergin.client_push import push_next_change, push_project_is_running, push_project_finalize, push_project_cancel - +from mergin.client_push import push_next_change, push_project_is_running, push_project_finalize, push_project_cancel, push_project_async +from mergin.client_sync import Syncer, calculate_uploads_size from pygeodiff import GeoDiff @@ -403,6 +403,47 @@ def status(ctx): pretty_summary(push_changes_summary) +@cli.command() +@click.pass_context +def sync(ctx): + """Synchronize the project. Pull latest project version from the server and push split changes.""" + mc = ctx.obj["client"] + if mc is None: + return + directory = os.getcwd() + syncer = Syncer(mc, directory) + size = syncer.estimate_total_upload() + if size == 0: + click.secho("Already up to date.", fg="green") + return + + try: + uploaded_so_far = 0 + + with click.progressbar(length=size, label="Uploading changes") as bar: + # updates the progress bar + def on_progress(last, now): + nonlocal uploaded_so_far + delta = now - last + uploaded_so_far += delta + bar.update(delta) + + # run pull → push cycles + syncer.sync_loop(progress_callback=on_progress) + + click.secho("Sync complete.", fg="green") + + except InvalidProject as e: + click.secho("Invalid project directory ({})".format(str(e)), fg="red") + except ClientError as e: + click.secho("Error: " + str(e), fg="red") + return + except KeyboardInterrupt: + click.secho("Cancelling...") + syncer.cancel() + except Exception as e: + _print_unhandled_exception() + @cli.command() @click.pass_context def push(ctx): @@ -412,24 +453,17 @@ def push(ctx): return directory = os.getcwd() try: - # keep going until there are no more changes - while True: - job = push_next_change(mc, directory) - if job is None: - click.echo("All changes uploaded.") - break - - # show progress for this single change upload - with click.progressbar(length=job.total_size, label="Uploading change") as bar: + job = push_project_async(mc, directory) + if job is not None: # if job is none, we don't upload any files, and the transaction is finished already + with click.progressbar(length=job.total_size) as bar: last_transferred_size = 0 while push_project_is_running(job): time.sleep(1 / 10) # 100ms new_transferred_size = job.transferred_size bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only last_transferred_size = new_transferred_size - # finalize this change upload (bump versions on server & locally) push_project_finalize(job) - click.echo("Change pushed, checking for more…") + click.secho("Done", fg="green") except InvalidProject as e: click.secho("Invalid project directory ({})".format(str(e)), fg="red") except ClientError as e: @@ -463,7 +497,7 @@ def pull(ctx): bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only last_transferred_size = new_transferred_size pull_project_finalize(job) - click.echo("Done") + click.secho("Done", fg="green") except InvalidProject as e: click.secho("Invalid project directory ({})".format(str(e)), fg="red") except ClientError as e: diff --git a/mergin/client.py b/mergin/client.py index 783173a..f2a9ad7 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -19,6 +19,7 @@ from typing import List +from .client_sync import Syncer from .common import ClientError, LoginError, WorkspaceRole, ProjectRole, LOG_FILE_SIZE_TO_SEND, MERGIN_DEFAULT_LOGS_URL from .merginproject import MerginProject from .client_pull import ( @@ -33,7 +34,7 @@ download_diffs_finalize, ) from .client_pull import pull_project_async, pull_project_wait, pull_project_finalize -from .client_push import push_next_change, push_project_wait, push_project_finalize +from .client_push import push_next_change, push_project_wait, push_project_finalize, push_project_async from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -890,6 +891,11 @@ def project_user_permissions(self, project_path): result["readers"] = access.get("readersnames", []) return result + + + + + def push_project(self, directory): """ Upload local changes to the repository. @@ -897,12 +903,12 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - while True: - job = push_next_change(self, directory) - if not job: - return # there is nothing to push (or we only deleted some files) - push_project_wait(job) - push_project_finalize(job) + # while True: + job = push_project_async(self, directory) + if not job: + return # there is nothing to push (or we only deleted some files) + push_project_wait(job) + push_project_finalize(job) def pull_project(self, directory): """ @@ -917,6 +923,25 @@ def pull_project(self, directory): pull_project_wait(job) return pull_project_finalize(job) + + def sync_project(self, directory): + + syncer = Syncer(self, directory) + syncer.sync_loop() + + # self.pull_project(directory) + # + # mp = MerginProject(directory) + # changes = mp.get_push_changes() + # + # if not changes: + # return + # + # push_changes(self, mp, changes) + # + # self.sync_project(directory) + + def clone_project(self, source_project_path, cloned_project_name, cloned_project_namespace=None): """ Clone project on server. diff --git a/mergin/client_push.py b/mergin/client_push.py index e791b84..47acc7d 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -15,8 +15,7 @@ import tempfile import concurrent.futures import os -from typing import Dict, List, Optional -import click +from typing import Dict, List, Optional, Tuple from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject @@ -27,7 +26,7 @@ class UploadJob: """Keeps all the important data about a pending upload job""" - def __init__(self, project_path, changes, transaction_id, mp, mc, tmp_dir): + def __init__(self, project_path, changes, transaction_id, mp, mc, tmp_dir, exclusive: bool): self.project_path = project_path # full project name ("username/projectname") self.changes = changes # dictionary of local changes to the project self.transaction_id = transaction_id # ID of the transaction assigned by the server @@ -37,6 +36,7 @@ def __init__(self, project_path, changes, transaction_id, mp, mc, tmp_dir): self.mp = mp # MerginProject instance self.mc = mc # MerginClient instance self.tmp_dir = tmp_dir # TemporaryDirectory instance for any temp file we need + self.exclusive = exclusive # flag whether this upload blocks other uploads self.is_cancelled = False # whether upload has been cancelled self.executor = None # ThreadPoolExecutor that manages background upload tasks self.futures = [] # list of futures submitted to the executor @@ -160,7 +160,7 @@ def split(self) -> List[Dict[str, List[dict]]]: return changes_list -def push_next_change(mc, directory) -> Optional[UploadJob]: +def push_project_async(mc, directory) -> Optional[UploadJob]: """Starts push of a change of a project and returns pending upload job""" mp = MerginProject(directory) @@ -198,14 +198,8 @@ def push_next_change(mc, directory) -> Optional[UploadJob]: + f"\n\nLocal version: {local_version}\nServer version: {server_version}" ) - all_changes = mp.get_push_changes() - changes_list = ChangesHandler(mc, project_info, all_changes).split() - if not changes_list: - return None - - # take only the first change - change = changes_list[0] - mp.log.debug("push change:\n" + pprint.pformat(change)) + changes = change_batch or mp.get_push_changes() + mp.log.debug("push change:\n" + pprint.pformat(changes)) tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") @@ -214,22 +208,22 @@ def push_next_change(mc, directory) -> Optional[UploadJob]: # That's because if there are pending transactions, checkpointing or switching from WAL mode # won't work, and we would end up with some changes left in -wal file which do not get # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything. - for f in change["updated"]: + for f in changes["updated"]: if mp.is_versioned_file(f["path"]) and "diff" not in f: mp.copy_versioned_file_for_upload(f, tmp_dir.name) - for f in change["added"]: + for f in changes["added"]: if mp.is_versioned_file(f["path"]): mp.copy_versioned_file_for_upload(f, tmp_dir.name) - if not any(len(v) for v in change.values()): + if not any(len(v) for v in changes.values()): mp.log.info(f"--- push {project_path} - nothing to do") return # drop internal info from being sent to server - for item in change["updated"]: + for item in changes["updated"]: item.pop("origin_checksum", None) - data = {"version": local_version, "changes": change} + data = {"version": local_version, "changes": changes} try: resp = mc.post( @@ -246,7 +240,8 @@ def push_next_change(mc, directory) -> Optional[UploadJob]: upload_files = data["changes"]["added"] + data["changes"]["updated"] transaction_id = server_resp["transaction"] if upload_files else None - job = UploadJob(project_path, change, transaction_id, mp, mc, tmp_dir) + exclusive = server_resp.get("exclusive", True) + job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir, exclusive) if not upload_files: mp.log.info("not uploading any files") @@ -254,7 +249,7 @@ def push_next_change(mc, directory) -> Optional[UploadJob]: push_project_finalize(job) return None # all done - no pending job - mp.log.info(f"got transaction ID {transaction_id}") + mp.log.info(f"got transaction ID {transaction_id}, {'exclusive' if exclusive else 'non-exclusive'} upload") upload_queue_items = [] total_size = 0 @@ -347,7 +342,7 @@ def push_project_finalize(job): if with_upload_of_files: try: - job.mp.log.info(f"Finishing transaction {job.transaction_id}") + job.mp.log.info(f"Finishing {'exclusive' if job.exclusive else 'non-exclusive'} transaction {job.transaction_id}") resp = job.mc.post("/v1/project/push/finish/%s" % job.transaction_id) job.server_resp = json.load(resp) except ClientError as err: @@ -417,3 +412,10 @@ def remove_diff_files(job) -> None: diff_file = job.mp.fpath_meta(change["diff"]["path"]) if os.path.exists(diff_file): os.remove(diff_file) + +def get_next_batch(project_dir) -> Tuple[Dict[str], bool]: + """ + Return the next dictionary with changes, similar to changes[0] in push_project_async. + """ + # TODO + return {"added": [], "updated": [], "removed": []}, True \ No newline at end of file diff --git a/mergin/client_sync.py b/mergin/client_sync.py new file mode 100644 index 0000000..8ba2d1e --- /dev/null +++ b/mergin/client_sync.py @@ -0,0 +1,81 @@ +import time +from typing import Dict, Any + +from .merginproject import MerginProject +from .client_pull import DownloadJob, pull_project_async, pull_project_wait, pull_project_finalize, pull_project_cancel +from .client_push import UploadJob, push_project_async, push_project_wait, push_project_finalize, push_next_change, \ + push_project_cancel, push_project_is_running + + +class Syncer: + def __init__(self, client, directory, download_job: DownloadJob = None, upload_job: UploadJob = None, total_upload_size: int = 0, uploaded_size: int = 0): + self.client = client + self.directory = directory + self.mp = MerginProject(directory) + self.download_job = download_job + self.upload_job = upload_job + self.total_upload_size = total_upload_size + self.uploaded_size = uploaded_size + + def _one_cycle(self): + """Pull remote, then kick off an upload (if there are changes). + Returns an UploadJob (with .transferred_size) or None if up‑to‑date.""" + download_job = pull_project_async(self.client, self.directory) + if download_job: + pull_project_wait(download_job) + pull_project_finalize(download_job) + + changes = self.mp.get_push_changes() + if not changes: + return None + + upload_job = push_next_change(self.client, self.mp, changes) + return upload_job + + def sync_loop(self, progress_callback=None): + """ + Repeatedly do pull → push cycles until no more changes. + If progress_callback is provided, it’s called as: + progress_callback(upload_job, last_transferred, new_transferred) + """ + while True: + upload_job = self._one_cycle() + if upload_job is None: + break + + # no UI: just wait & finalize + if progress_callback is None: + push_project_wait(upload_job) + push_project_finalize(upload_job) + continue + + # drive the callback + last = 0 + while push_project_is_running(upload_job): + time.sleep(0.1) + now = upload_job.transferred_size + progress_callback(upload_job, last, now) + last = now + + push_project_wait(upload_job) + push_project_finalize(upload_job) + + def cancel(self): + if self.download_job is not None: + pull_project_cancel(self.download_job) + if self.upload_job is not None: + push_project_cancel(self.upload_job) + + def estimate_total_upload(self): + """ + One‑shot estimate of the _current_ total upload size. + """ + changes = self.mp.get_push_changes() + return calculate_uploads_size(changes) if changes else 0 + +def calculate_uploads_size(changes: Dict[str, Any]) -> int: + files = changes.get("added", []) + changes.get("updated", []) + return sum( + f.get("diff", {}).get("size", f.get("size", 0)) + for f in files + ) diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 5e46ac7..0aa1516 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -1,5 +1,7 @@ import json import logging +import pprint + import math import os import re @@ -7,7 +9,7 @@ import uuid import tempfile from datetime import datetime -from typing import List, Dict +from typing import List, Dict, Tuple from dateutil.tz import tzlocal from .editor import prevent_conflicted_copy @@ -462,6 +464,10 @@ def get_push_changes(self) -> Dict[str, List[dict]]: pass changes["updated"] = [f for f in changes["updated"] if f not in not_updated] + if changes: + self.log.debug(f"All local changes:\n" + pprint.pformat(changes)) + else: + self.log.debug("No local changes. Nothing to upload.") return changes def copy_versioned_file_for_upload(self, f, tmp_dir): diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index aa31f8b..ea9df38 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1,3 +1,4 @@ +import concurrent.futures import json import logging import os @@ -5,8 +6,11 @@ import tempfile import subprocess import shutil +import time from collections import defaultdict from datetime import datetime, timedelta, date +from unittest.mock import patch + import pytest import pytz import sqlite3 @@ -22,7 +26,7 @@ TokenError, ServerType, ) -from ..client_push import push_project_async, push_project_cancel, ChangesHandler +from ..client_push import push_next_change, push_project_cancel, ChangesHandler, _do_upload from ..client_pull import ( download_project_async, download_project_wait, @@ -394,7 +398,7 @@ def test_cancel_push(mc): assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) # start pushing and then cancel the job - jobs = push_project_async(mc, project_dir) + jobs = push_changes(mc, project_dir) for job in jobs: push_project_cancel(job) @@ -2895,7 +2899,7 @@ def test_mc_without_login(): with pytest.raises(ClientError, match="Authentication information is missing or invalid."): mc.workspaces_list() -def sort_dict_of_files_by_path(d): +def _sort_dict_of_files_by_path(d): return { k: sorted(v, key=lambda f: f["path"]) for k, v in d.items() } @@ -2927,5 +2931,133 @@ def test_changes_handler(mc): for d in split_changes: for k, v in d.items(): merged[k].extend(v) - assert sort_dict_of_files_by_path(merged) == sort_dict_of_files_by_path(mixed_changes) + assert _sort_dict_of_files_by_path(merged) == _sort_dict_of_files_by_path(mixed_changes) + + +# import sqlite3 +# import os + +def inflate_gpkg(path, blob_size_bytes=1024*1024, rows=50): + """ + Append a table named 'inflate' to the GeoPackage at `path`, + then insert `rows` rows each containing a BLOB of size `blob_size_bytes`. + This will grow the file by roughly rows * blob_size_bytes. + """ + # make sure file exists + if not os.path.exists(path): + raise FileNotFoundError(path) + con = sqlite3.connect(path) + cur = con.cursor() + # 1) create the dummy table if it doesn't already exist + cur.execute(""" + CREATE TABLE IF NOT EXISTS inflate ( + id INTEGER PRIMARY KEY, + data BLOB NOT NULL + ); + """) + # 2) prepare one blob of the given size + dummy_blob = sqlite3.Binary(b'\x00' * blob_size_bytes) + # 3) insert a bunch of rows + for _ in range(rows): + cur.execute("INSERT INTO inflate (data) VALUES (?);", (dummy_blob,)) + con.commit() + con.close() + + +# def _make_slow_upload(delay: float): +# """ +# Helper to mock up a slow upload +# """ +# def slow_upload(item, job): +# time.sleep(delay) # delay in seconds for each chunk upload +# return _do_upload(item, job) +# return slow_upload +# +# +# def _delayed_push(mc: MerginClient, directory: str, delay: float): +# """ +# Patches chunks upload during project push +# """ +# with patch("mergin.client_push._do_upload", side_effect=_make_slow_upload(delay)): +# return mc.push_project(directory) + + + +files_to_push = [ + # ("base.gpkg", "inserted_1_A.gpkg", False), # both pushes are exclusive, the latter one is refused + ("inserted_1_A.gpkg", "test.txt", True), # the second push is non-exclusive - it is free to go + # ("test3.txt", "inserted_1_A_mod.gpkg", True), # the first push is non-exclusive - it does not block other pushes +] + +@pytest.mark.parametrize("file1,file2,success", files_to_push) +def test_exclusive_upload(mc, mc2, file1, file2, success): + """ + Test two clients pushing at the same time + """ + test_project_name = "test_exclusive_upload" + project_dir1 = os.path.join(TMP_DIR, test_project_name + "_1") + project_dir2 = os.path.join(TMP_DIR, test_project_name + "_2") + + project_full_name = API_USER + "/" + test_project_name + cleanup(mc, project_full_name, [project_dir1, project_dir2]) + mc.create_project(test_project_name) + project_info = mc.project_info(project_full_name) + mc.download_project(project_full_name, project_dir1) + mc.add_project_collaborator(project_info["id"], API_USER2, ProjectRole.WRITER) + mc2.download_project(project_full_name, project_dir2) + + def push1(): + mc.push_project(project_dir1) + + def push2(): + mc2.push_project(project_dir2) + + # with open(os.path.join(project_dir1, file1), "wb") as f: + # f.write(os.urandom(50 * 1024 * 1024)) # 50 MB + shutil.copy(os.path.join(TEST_DATA_DIR, file1), project_dir1) + shutil.copy(os.path.join(TEST_DATA_DIR, file2), project_dir2) + big_gpkg = os.path.join(project_dir1, file1) + # this will add ~50 MB of zero‐bytes to the file + inflate_gpkg(big_gpkg, blob_size_bytes=1_000_000, rows=50) + + # first_upload_delay = 2 + # resp1 = _delayed_push(mc, project_dir1, first_upload_delay) + # resp2 = _delayed_push(mc, project_dir2, 0) + # if not success: + # resp1. + + # run both pushes concurrently + # with patch("mergin.client_push._do_upload", side_effect=_slow_upload): + with concurrent.futures.ThreadPoolExecutor() as executor: + future1 = executor.submit(push1) + future2 = executor.submit(push2) + # first_upload_delay = 2 + # future1 = executor.submit(_delayed_push, mc, project_dir1, first_upload_delay) + # future2 = executor.submit(_delayed_push, mc2, project_dir2, 0) + # time.sleep(first_upload_delay + 0.2) + # assert not future1.exception() + exc2 = future2.exception() + exc1 = future1.exception() + # assert not exc2 + + if not success: + error = exc1 if exc1 else exc2 # one is uploads is lucky to pass the other was slow + assert (exc1 is None or exc2 is None) + assert isinstance(error, ClientError) + assert error.detail == "Another process is running. Please try later." + + # assert type(exc1) is ClientError + # assert exc1.http_error == 400 + # assert exc1.detail == "Another process is running. Please try later." + else: + # assert not exc1 + assert not (exc1 or exc2) + # assert (exc1 is None and isinstance(exc2, ClientError) or (exc2 is None and isinstance(exc1, ClientError))) + # if not success: + # assert type(exc2) is ClientError + # assert exc2.http_error == 400 + # assert exc2.detail == "Another process is running. Please try later." + # else: + # assert not exc2 + From d74c7b391f0a9830662c09cec9a4d58022b2fb49 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Thu, 10 Jul 2025 08:24:47 +0200 Subject: [PATCH 25/31] sync methods --- mergin/cli.py | 14 +++---- mergin/client.py | 79 +++++++++++++++++++++++++++++----------- mergin/client_push.py | 38 ++++++++++++++----- mergin/client_sync.py | 81 ----------------------------------------- mergin/merginproject.py | 2 +- 5 files changed, 94 insertions(+), 120 deletions(-) delete mode 100644 mergin/client_sync.py diff --git a/mergin/cli.py b/mergin/cli.py index 5d890f0..c3e5c76 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -33,8 +33,7 @@ download_project_is_running, ) from mergin.client_pull import pull_project_async, pull_project_is_running, pull_project_finalize, pull_project_cancel -from mergin.client_push import push_next_change, push_project_is_running, push_project_finalize, push_project_cancel, push_project_async -from mergin.client_sync import Syncer, calculate_uploads_size +from mergin.client_push import push_project_is_running, push_project_finalize, push_project_cancel, push_project_async, total_upload_size from pygeodiff import GeoDiff @@ -411,8 +410,8 @@ def sync(ctx): if mc is None: return directory = os.getcwd() - syncer = Syncer(mc, directory) - size = syncer.estimate_total_upload() + + size = total_upload_size(directory) if size == 0: click.secho("Already up to date.", fg="green") return @@ -428,8 +427,8 @@ def on_progress(last, now): uploaded_so_far += delta bar.update(delta) - # run pull → push cycles - syncer.sync_loop(progress_callback=on_progress) + # run pull & push cycles + mc.sync_project_with_callback(directory, progress_callback=on_progress) click.secho("Sync complete.", fg="green") @@ -440,7 +439,8 @@ def on_progress(last, now): return except KeyboardInterrupt: click.secho("Cancelling...") - syncer.cancel() + push_project_cancel(mc.upload_job) + pull_project_cancel(mc.download_job) except Exception as e: _print_unhandled_exception() diff --git a/mergin/client.py b/mergin/client.py index f2a9ad7..6957fa9 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1,4 +1,6 @@ import logging +from time import sleep + import math import os import json @@ -19,7 +21,6 @@ from typing import List -from .client_sync import Syncer from .common import ClientError, LoginError, WorkspaceRole, ProjectRole, LOG_FILE_SIZE_TO_SEND, MERGIN_DEFAULT_LOGS_URL from .merginproject import MerginProject from .client_pull import ( @@ -32,9 +33,13 @@ download_project_finalize, download_project_wait, download_diffs_finalize, + pull_project_is_running, + pull_project_async, + pull_project_wait, + pull_project_finalize ) -from .client_pull import pull_project_async, pull_project_wait, pull_project_finalize -from .client_push import push_next_change, push_project_wait, push_project_finalize, push_project_async +from .client_push import push_project_wait, push_project_finalize, push_project_async, push_project_is_running, \ + ChangesHandler from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -104,6 +109,8 @@ def __init__( if plugin_version is not None: # this could be e.g. "Plugin/2020.1 QGIS/3.14" self.client_version += " " + plugin_version self.setup_logging() + self.pull_job = None + self.push_job = None if auth_token: try: token_data = decode_token_data(auth_token) @@ -891,11 +898,6 @@ def project_user_permissions(self, project_path): result["readers"] = access.get("readersnames", []) return result - - - - - def push_project(self, directory): """ Upload local changes to the repository. @@ -923,23 +925,56 @@ def pull_project(self, directory): pull_project_wait(job) return pull_project_finalize(job) + def sync_project(self, project_dir): + """ + Syncs project by loop with these steps: + 1. Pull server version + 2. Get local changes + 3. Push first change batch + Repeat if there are more changes pending. + """ + has_more_changes = True + while has_more_changes: + self.pull_job = pull_project_async(self, project_dir) + pull_project_wait(self.pull_job) + pull_project_finalize(self.pull_job) + + changes_handler = ChangesHandler(self, project_dir) + changes_batch, has_more_changes = changes_handler.get_change_batch() + + self.push_job = push_project_async(project_dir, changes_batch) + push_project_wait(self.push_job) + push_project_finalize(self.push_job) + + def sync_project_with_callback(self, project_dir, progress_callback=None, sleep_time=100): + """ + Syncs project while sending push progress info as callback. + Sync is done in this loop: + Pending changes? -> Pull -> Push change batch -> repeat + """ + has_more_changes = True + while has_more_changes: + changes_handler = ChangesHandler(self, project_dir) + + self.pull_job = pull_project_async(self, project_dir) + if self.pull_job is None: + return + pull_project_wait(self.pull_job) + pull_project_finalize(self.pull_job) - def sync_project(self, directory): + changes_batch, has_more_changes = changes_handler.get_change_batch() - syncer = Syncer(self, directory) - syncer.sync_loop() + self.push_job = push_project_async(self, project_dir, changes_batch) + if not self.push_job: + return - # self.pull_project(directory) - # - # mp = MerginProject(directory) - # changes = mp.get_push_changes() - # - # if not changes: - # return - # - # push_changes(self, mp, changes) - # - # self.sync_project(directory) + last = 0 + while push_project_is_running(self.push_job): + sleep(sleep_time) + now = self.push_job.transferred_size + progress_callback(last, now) + last = now + push_project_finalize(self.push_job) def clone_project(self, source_project_path, cloned_project_name, cloned_project_namespace=None): diff --git a/mergin/client_push.py b/mergin/client_push.py index 47acc7d..764cf04 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -96,10 +96,10 @@ class ChangesHandler: - Generating upload-ready change groups for asynchronous job creation. """ - def __init__(self, client, project_info, changes): + def __init__(self, client, mp): self.client = client - self.project_info = project_info - self._raw_changes = changes + self.mp = MerginProject(mp) + self._raw_changes = self.mp.get_push_changes() @staticmethod def is_blocking_file(file): @@ -117,7 +117,12 @@ def _filter_changes(self, changes: Dict[str, List[dict]]) -> Dict[str, List[dict Returns: dict[str, list[dict]]: The filtered changes dictionary. """ - if not is_editor_enabled(self.client, self.project_info): + project_name = self.mp.project_full_name() + try: + project_info = self.client.project_info(project_name) + except Exception as e: + self.mp.log.error(f"Failed to get project info for project {project_name}: {e}") + if not is_editor_enabled(self.client, project_info): return changes return _apply_editor_filters(changes) @@ -159,8 +164,17 @@ def split(self) -> List[Dict[str, List[dict]]]: # TODO: apply limits; changes = self._limit_by_file_count(changes) return changes_list + def get_change_batch(self) -> Tuple[Optional[Dict[str, List[dict]]], bool]: + """ + Return the next changes dictionary and flag if there are more changes + """ + changes_list = self.split() + if not changes_list: + return None, False + return changes_list[0], len(changes_list) > 1 + -def push_project_async(mc, directory) -> Optional[UploadJob]: +def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: """Starts push of a change of a project and returns pending upload job""" mp = MerginProject(directory) @@ -413,9 +427,15 @@ def remove_diff_files(job) -> None: if os.path.exists(diff_file): os.remove(diff_file) -def get_next_batch(project_dir) -> Tuple[Dict[str], bool]: + +def total_upload_size(directory) -> int: """ - Return the next dictionary with changes, similar to changes[0] in push_project_async. + Total up the number of bytes that need to be uploaded. """ - # TODO - return {"added": [], "updated": [], "removed": []}, True \ No newline at end of file + mp = MerginProject(directory) + changes = mp.get_push_changes() + files = changes.get("added", []) + changes.get("updated", []) + return sum( + f.get("diff", {}).get("size", f.get("size", 0)) + for f in files + ) \ No newline at end of file diff --git a/mergin/client_sync.py b/mergin/client_sync.py deleted file mode 100644 index 8ba2d1e..0000000 --- a/mergin/client_sync.py +++ /dev/null @@ -1,81 +0,0 @@ -import time -from typing import Dict, Any - -from .merginproject import MerginProject -from .client_pull import DownloadJob, pull_project_async, pull_project_wait, pull_project_finalize, pull_project_cancel -from .client_push import UploadJob, push_project_async, push_project_wait, push_project_finalize, push_next_change, \ - push_project_cancel, push_project_is_running - - -class Syncer: - def __init__(self, client, directory, download_job: DownloadJob = None, upload_job: UploadJob = None, total_upload_size: int = 0, uploaded_size: int = 0): - self.client = client - self.directory = directory - self.mp = MerginProject(directory) - self.download_job = download_job - self.upload_job = upload_job - self.total_upload_size = total_upload_size - self.uploaded_size = uploaded_size - - def _one_cycle(self): - """Pull remote, then kick off an upload (if there are changes). - Returns an UploadJob (with .transferred_size) or None if up‑to‑date.""" - download_job = pull_project_async(self.client, self.directory) - if download_job: - pull_project_wait(download_job) - pull_project_finalize(download_job) - - changes = self.mp.get_push_changes() - if not changes: - return None - - upload_job = push_next_change(self.client, self.mp, changes) - return upload_job - - def sync_loop(self, progress_callback=None): - """ - Repeatedly do pull → push cycles until no more changes. - If progress_callback is provided, it’s called as: - progress_callback(upload_job, last_transferred, new_transferred) - """ - while True: - upload_job = self._one_cycle() - if upload_job is None: - break - - # no UI: just wait & finalize - if progress_callback is None: - push_project_wait(upload_job) - push_project_finalize(upload_job) - continue - - # drive the callback - last = 0 - while push_project_is_running(upload_job): - time.sleep(0.1) - now = upload_job.transferred_size - progress_callback(upload_job, last, now) - last = now - - push_project_wait(upload_job) - push_project_finalize(upload_job) - - def cancel(self): - if self.download_job is not None: - pull_project_cancel(self.download_job) - if self.upload_job is not None: - push_project_cancel(self.upload_job) - - def estimate_total_upload(self): - """ - One‑shot estimate of the _current_ total upload size. - """ - changes = self.mp.get_push_changes() - return calculate_uploads_size(changes) if changes else 0 - -def calculate_uploads_size(changes: Dict[str, Any]) -> int: - files = changes.get("added", []) + changes.get("updated", []) - return sum( - f.get("diff", {}).get("size", f.get("size", 0)) - for f in files - ) diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 0aa1516..cd9ddc0 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -9,7 +9,7 @@ import uuid import tempfile from datetime import datetime -from typing import List, Dict, Tuple +from typing import List, Dict from dateutil.tz import tzlocal from .editor import prevent_conflicted_copy From da3950752c39489ae7493e55462d86798028c3f5 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 11 Jul 2025 07:39:40 +0200 Subject: [PATCH 26/31] Cleanup and docstrings --- README.md | 2 +- mergin/cli.py | 26 ++++------ mergin/client.py | 58 ++++++++++++----------- mergin/client_push.py | 102 ++++++++++++++++++++++------------------ mergin/merginproject.py | 4 -- 5 files changed, 97 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index 010282a..4ccf251 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ Commands: show-file-history Display information about a single version of a... show-version Display information about a single version of a... status Show all changes in project files - upstream and... - sync Syncronize the project. + sync Synchronize the project. ``` ### Examples diff --git a/mergin/cli.py b/mergin/cli.py index c3e5c76..a3e3872 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -411,23 +411,13 @@ def sync(ctx): return directory = os.getcwd() - size = total_upload_size(directory) - if size == 0: - click.secho("Already up to date.", fg="green") - return - try: - uploaded_so_far = 0 - - with click.progressbar(length=size, label="Uploading changes") as bar: - # updates the progress bar - def on_progress(last, now): - nonlocal uploaded_so_far - delta = now - last - uploaded_so_far += delta - bar.update(delta) + size = total_upload_size(directory) + with click.progressbar(length=size, label="Syncing") as bar: + def on_progress(increment): + bar.update(increment) - # run pull & push cycles + # run pull & push cycles until there are no local changes mc.sync_project_with_callback(directory, progress_callback=on_progress) click.secho("Sync complete.", fg="green") @@ -439,8 +429,10 @@ def on_progress(last, now): return except KeyboardInterrupt: click.secho("Cancelling...") - push_project_cancel(mc.upload_job) - pull_project_cancel(mc.download_job) + if mc.pull_job: + pull_project_cancel(mc.pull_job) + if mc.push_job: + push_project_cancel(mc.push_job) except Exception as e: _print_unhandled_exception() diff --git a/mergin/client.py b/mergin/client.py index 6957fa9..51ce95b 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -33,13 +33,12 @@ download_project_finalize, download_project_wait, download_diffs_finalize, - pull_project_is_running, pull_project_async, pull_project_wait, pull_project_finalize ) from .client_push import push_project_wait, push_project_finalize, push_project_async, push_project_is_running, \ - ChangesHandler + ChangesHandler, get_change_batch from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -936,45 +935,48 @@ def sync_project(self, project_dir): has_more_changes = True while has_more_changes: self.pull_job = pull_project_async(self, project_dir) - pull_project_wait(self.pull_job) - pull_project_finalize(self.pull_job) + if self.pull_job is not None: + pull_project_wait(self.pull_job) + pull_project_finalize(self.pull_job) - changes_handler = ChangesHandler(self, project_dir) - changes_batch, has_more_changes = changes_handler.get_change_batch() + changes_batch, has_more_changes = get_change_batch(self, project_dir) + if not changes_batch: # no changes to upload, quit sync + return - self.push_job = push_project_async(project_dir, changes_batch) - push_project_wait(self.push_job) - push_project_finalize(self.push_job) + self.push_job = push_project_async(self, project_dir, changes_batch) + if self.push_job: + push_project_wait(self.push_job) + push_project_finalize(self.push_job) - def sync_project_with_callback(self, project_dir, progress_callback=None, sleep_time=100): + def sync_project_with_callback(self, project_dir, progress_callback=None, sleep_time=0.1): """ Syncs project while sending push progress info as callback. Sync is done in this loop: - Pending changes? -> Pull -> Push change batch -> repeat + Pending changes? -> Pull -> Get changes batch -> Push the changes -> repeat + + :param progress_callback: updates the progress bar in CLI, on_progress(increment) + :param sleep_time: sleep time between calling the callback function """ has_more_changes = True while has_more_changes: - changes_handler = ChangesHandler(self, project_dir) - self.pull_job = pull_project_async(self, project_dir) - if self.pull_job is None: - return - pull_project_wait(self.pull_job) - pull_project_finalize(self.pull_job) - - changes_batch, has_more_changes = changes_handler.get_change_batch() + if self.pull_job is not None: + pull_project_wait(self.pull_job) + pull_project_finalize(self.pull_job) - self.push_job = push_project_async(self, project_dir, changes_batch) - if not self.push_job: + changes_batch, has_more_changes = get_change_batch(self, project_dir) + if not changes_batch: # no changes to upload, quit sync return - last = 0 - while push_project_is_running(self.push_job): - sleep(sleep_time) - now = self.push_job.transferred_size - progress_callback(last, now) - last = now - push_project_finalize(self.push_job) + self.push_job = push_project_async(self, project_dir, changes_batch) + if self.push_job: + last = 0 + while push_project_is_running(self.push_job): + sleep(sleep_time) + now = self.push_job.transferred_size + progress_callback(now - last) # update progressbar with transferred size increment + last = now + push_project_finalize(self.push_job) def clone_project(self, source_project_path, cloned_project_name, cloned_project_namespace=None): diff --git a/mergin/client_push.py b/mergin/client_push.py index 764cf04..f3662a3 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -132,8 +132,8 @@ def _split_by_type(self, changes: Dict[str, List[dict]]) -> List[Dict[str, List[ 1. Blocking: updated/removed and added files that are blocking 2. Non-blocking: added files that are not blocking """ - blocking_changes = {"added": [], "updated": [], "removed": [], "renamed": []} - non_blocking_changes = {"added": [], "updated": [], "removed": [], "renamed": []} + blocking_changes = {"added": [], "updated": [], "removed": []} + non_blocking_changes = {"added": [], "updated": [], "removed": []} for f in changes.get("added", []): if self.is_blocking_file(f): @@ -164,18 +164,26 @@ def split(self) -> List[Dict[str, List[dict]]]: # TODO: apply limits; changes = self._limit_by_file_count(changes) return changes_list - def get_change_batch(self) -> Tuple[Optional[Dict[str, List[dict]]], bool]: - """ - Return the next changes dictionary and flag if there are more changes - """ - changes_list = self.split() - if not changes_list: - return None, False - return changes_list[0], len(changes_list) > 1 +def get_change_batch(mc, project_dir) -> Tuple[Optional[Dict[str, List[dict]]], bool]: + """ + Return the next changes dictionary and flag if there are more changes (to be uploaded in the next upload job) + """ + changes_list = ChangesHandler(mc, project_dir).split() + if not changes_list: + return None, False + non_empty_length = sum(any(v for v in d.values()) for d in changes_list) + return changes_list[0], non_empty_length > 1 def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: - """Starts push of a change of a project and returns pending upload job""" + """ + Starts push in background and returns pending upload job. + Pushes all project changes unless change_batch is provided. + When specific change is provided, initial version check is skipped (the pull has just been done). + + :param change_batch: A dictionary of changes that was split to blocking and non-blocking. + Pushing only non-blocking changes results in non-exclusive upload which server allows to be concurrent. + """ mp = MerginProject(directory) if mp.has_unfinished_pull(): @@ -187,30 +195,32 @@ def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: mp.log.info("--- version: " + mc.user_agent_info()) mp.log.info(f"--- start push {project_path}") - try: - project_info = mc.project_info(project_path) - except ClientError as err: - mp.log.error("Error getting project info: " + str(err)) - mp.log.info("--- push aborted") - raise - server_version = project_info["version"] if project_info["version"] else "v0" - - mp.log.info(f"got project info: local version {local_version} / server version {server_version}") - - username = mc.username() - # permissions field contains information about update, delete and upload privileges of the user - # on a specific project. This is more accurate information than "writernames" field, as it takes - # into account namespace privileges. So we have to check only "permissions", namely "upload" once - if not mc.has_writing_permissions(project_path): - mp.log.error(f"--- push {project_path} - username {username} does not have write access") - raise ClientError(f"You do not seem to have write access to the project (username '{username}')") - - if local_version != server_version: - mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") - raise ClientError( - "There is a new version of the project on the server. Please update your local copy." - + f"\n\nLocal version: {local_version}\nServer version: {server_version}" - ) + # if we have specific change to push we don't need version check + if not change_batch: + try: + project_info = mc.project_info(project_path) + except ClientError as err: + mp.log.error("Error getting project info: " + str(err)) + mp.log.info("--- push aborted") + raise + server_version = project_info["version"] if project_info["version"] else "v0" + + mp.log.info(f"got project info: local version {local_version} / server version {server_version}") + + username = mc.username() + # permissions field contains information about update, delete and upload privileges of the user + # on a specific project. This is more accurate information than "writernames" field, as it takes + # into account namespace privileges. So we have to check only "permissions", namely "upload" once + if not mc.has_writing_permissions(project_path): + mp.log.error(f"--- push {project_path} - username {username} does not have write access") + raise ClientError(f"You do not seem to have write access to the project (username '{username}')") + + if local_version != server_version: + mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") + raise ClientError( + "There is a new version of the project on the server. Please update your local copy." + + f"\n\nLocal version: {local_version}\nServer version: {server_version}" + ) changes = change_batch or mp.get_push_changes() mp.log.debug("push change:\n" + pprint.pformat(changes)) @@ -288,16 +298,16 @@ def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: total_size += file_size - job.total_size = total_size - job.upload_queue_items = upload_queue_items + job.total_size = total_size + job.upload_queue_items = upload_queue_items - mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}") + mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}") - # start uploads in background - job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) - for item in upload_queue_items: - future = job.executor.submit(_do_upload, item, job) - job.futures.append(future) + # start uploads in background + job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) + for item in upload_queue_items: + future = job.executor.submit(_do_upload, item, job) + job.futures.append(future) return job @@ -435,7 +445,9 @@ def total_upload_size(directory) -> int: mp = MerginProject(directory) changes = mp.get_push_changes() files = changes.get("added", []) + changes.get("updated", []) - return sum( + size = sum( f.get("diff", {}).get("size", f.get("size", 0)) for f in files - ) \ No newline at end of file + ) + mp.log.info(f"Upload size of all files is {size}") + return size diff --git a/mergin/merginproject.py b/mergin/merginproject.py index cd9ddc0..b423abe 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -464,10 +464,6 @@ def get_push_changes(self) -> Dict[str, List[dict]]: pass changes["updated"] = [f for f in changes["updated"] if f not in not_updated] - if changes: - self.log.debug(f"All local changes:\n" + pprint.pformat(changes)) - else: - self.log.debug("No local changes. Nothing to upload.") return changes def copy_versioned_file_for_upload(self, f, tmp_dir): From 5ba7535b72c5c7d4bec780fa781a42dc86d4aa73 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 11 Jul 2025 09:16:18 +0200 Subject: [PATCH 27/31] formatting --- mergin/cli.py | 10 ++- mergin/client.py | 14 ++-- mergin/client_push.py | 12 ++-- mergin/test/test_client.py | 129 ++++++++++++++++--------------------- 4 files changed, 78 insertions(+), 87 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index a3e3872..3eb80d9 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -33,7 +33,13 @@ download_project_is_running, ) from mergin.client_pull import pull_project_async, pull_project_is_running, pull_project_finalize, pull_project_cancel -from mergin.client_push import push_project_is_running, push_project_finalize, push_project_cancel, push_project_async, total_upload_size +from mergin.client_push import ( + push_project_is_running, + push_project_finalize, + push_project_cancel, + push_project_async, + total_upload_size, +) from pygeodiff import GeoDiff @@ -414,6 +420,7 @@ def sync(ctx): try: size = total_upload_size(directory) with click.progressbar(length=size, label="Syncing") as bar: + def on_progress(increment): bar.update(increment) @@ -436,6 +443,7 @@ def on_progress(increment): except Exception as e: _print_unhandled_exception() + @cli.command() @click.pass_context def push(ctx): diff --git a/mergin/client.py b/mergin/client.py index 51ce95b..74f36d2 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -35,10 +35,16 @@ download_diffs_finalize, pull_project_async, pull_project_wait, - pull_project_finalize + pull_project_finalize, +) +from .client_push import ( + push_project_wait, + push_project_finalize, + push_project_async, + push_project_is_running, + ChangesHandler, + get_change_batch, ) -from .client_push import push_project_wait, push_project_finalize, push_project_async, push_project_is_running, \ - ChangesHandler, get_change_batch from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -904,7 +910,6 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - # while True: job = push_project_async(self, directory) if not job: return # there is nothing to push (or we only deleted some files) @@ -978,7 +983,6 @@ def sync_project_with_callback(self, project_dir, progress_callback=None, sleep_ last = now push_project_finalize(self.push_job) - def clone_project(self, source_project_path, cloned_project_name, cloned_project_namespace=None): """ Clone project on server. diff --git a/mergin/client_push.py b/mergin/client_push.py index f3662a3..ffeaf1a 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -164,6 +164,7 @@ def split(self) -> List[Dict[str, List[dict]]]: # TODO: apply limits; changes = self._limit_by_file_count(changes) return changes_list + def get_change_batch(mc, project_dir) -> Tuple[Optional[Dict[str, List[dict]]], bool]: """ Return the next changes dictionary and flag if there are more changes (to be uploaded in the next upload job) @@ -264,7 +265,7 @@ def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: upload_files = data["changes"]["added"] + data["changes"]["updated"] transaction_id = server_resp["transaction"] if upload_files else None - exclusive = server_resp.get("exclusive", True) + exclusive = server_resp.get("blocking", True) job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir, exclusive) if not upload_files: @@ -366,7 +367,9 @@ def push_project_finalize(job): if with_upload_of_files: try: - job.mp.log.info(f"Finishing {'exclusive' if job.exclusive else 'non-exclusive'} transaction {job.transaction_id}") + job.mp.log.info( + f"Finishing {'exclusive' if job.exclusive else 'non-exclusive'} transaction {job.transaction_id}" + ) resp = job.mc.post("/v1/project/push/finish/%s" % job.transaction_id) job.server_resp = json.load(resp) except ClientError as err: @@ -445,9 +448,6 @@ def total_upload_size(directory) -> int: mp = MerginProject(directory) changes = mp.get_push_changes() files = changes.get("added", []) + changes.get("updated", []) - size = sum( - f.get("diff", {}).get("size", f.get("size", 0)) - for f in files - ) + size = sum(f.get("diff", {}).get("size", f.get("size", 0)) for f in files) mp.log.info(f"Upload size of all files is {size}") return size diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index ea9df38..fadb24b 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -26,7 +26,7 @@ TokenError, ServerType, ) -from ..client_push import push_next_change, push_project_cancel, ChangesHandler, _do_upload +from ..client_push import push_project_cancel, ChangesHandler from ..client_pull import ( download_project_async, download_project_wait, @@ -2899,10 +2899,10 @@ def test_mc_without_login(): with pytest.raises(ClientError, match="Authentication information is missing or invalid."): mc.workspaces_list() + def _sort_dict_of_files_by_path(d): - return { - k: sorted(v, key=lambda f: f["path"]) for k, v in d.items() - } + return {k: sorted(v, key=lambda f: f["path"]) for k, v in d.items()} + def test_changes_handler(mc): """ @@ -2937,7 +2937,8 @@ def test_changes_handler(mc): # import sqlite3 # import os -def inflate_gpkg(path, blob_size_bytes=1024*1024, rows=50): + +def inflate_gpkg(path, blob_size_bytes=1024 * 1024, rows=50): """ Append a table named 'inflate' to the GeoPackage at `path`, then insert `rows` rows each containing a BLOB of size `blob_size_bytes`. @@ -2949,14 +2950,16 @@ def inflate_gpkg(path, blob_size_bytes=1024*1024, rows=50): con = sqlite3.connect(path) cur = con.cursor() # 1) create the dummy table if it doesn't already exist - cur.execute(""" + cur.execute( + """ CREATE TABLE IF NOT EXISTS inflate ( id INTEGER PRIMARY KEY, data BLOB NOT NULL ); - """) + """ + ) # 2) prepare one blob of the given size - dummy_blob = sqlite3.Binary(b'\x00' * blob_size_bytes) + dummy_blob = sqlite3.Binary(b"\x00" * blob_size_bytes) # 3) insert a bunch of rows for _ in range(rows): cur.execute("INSERT INTO inflate (data) VALUES (?);", (dummy_blob,)) @@ -2964,33 +2967,34 @@ def inflate_gpkg(path, blob_size_bytes=1024*1024, rows=50): con.close() -# def _make_slow_upload(delay: float): -# """ -# Helper to mock up a slow upload -# """ -# def slow_upload(item, job): -# time.sleep(delay) # delay in seconds for each chunk upload -# return _do_upload(item, job) -# return slow_upload -# -# -# def _delayed_push(mc: MerginClient, directory: str, delay: float): -# """ -# Patches chunks upload during project push -# """ -# with patch("mergin.client_push._do_upload", side_effect=_make_slow_upload(delay)): -# return mc.push_project(directory) - +def create_dummy_photos(dir_path, count=20, size_kb=5000): + """Create `count` dummy JPG files in `dir_path` with ~`size_kb` each.""" + os.makedirs(dir_path, exist_ok=True) + for i in range(count): + filename = os.path.join(dir_path, f"photo_{i:03}.jpg") + with open(filename, "wb") as f: + f.write(os.urandom(size_kb * 1024)) # Random bytes to simulate real file files_to_push = [ - # ("base.gpkg", "inserted_1_A.gpkg", False), # both pushes are exclusive, the latter one is refused - ("inserted_1_A.gpkg", "test.txt", True), # the second push is non-exclusive - it is free to go - # ("test3.txt", "inserted_1_A_mod.gpkg", True), # the first push is non-exclusive - it does not block other pushes + ( + "base.gpkg", + "inserted_1_A.gpkg", + False, + "another_process", + ), # both pushes are exclusive, the latter one is refused + ( + "inserted_1_A.gpkg", + "test.txt", + False, + "version_conflict", + ), # small files pushed at the same time might result in version conflict due to race condition + ("inserted_1_A.gpkg", "many_photos", True, None), # the upload of many photos does not block the other upload ] -@pytest.mark.parametrize("file1,file2,success", files_to_push) -def test_exclusive_upload(mc, mc2, file1, file2, success): + +@pytest.mark.parametrize("file1,file2,success,fail_reason", files_to_push) +def test_exclusive_upload(mc, mc2, file1, file2, success, fail_reason): """ Test two clients pushing at the same time """ @@ -3006,58 +3010,33 @@ def test_exclusive_upload(mc, mc2, file1, file2, success): mc.add_project_collaborator(project_info["id"], API_USER2, ProjectRole.WRITER) mc2.download_project(project_full_name, project_dir2) - def push1(): - mc.push_project(project_dir1) + def sync1(): + mc.sync_project(project_dir1) - def push2(): - mc2.push_project(project_dir2) + def sync2(): + mc2.sync_project(project_dir2) - # with open(os.path.join(project_dir1, file1), "wb") as f: - # f.write(os.urandom(50 * 1024 * 1024)) # 50 MB shutil.copy(os.path.join(TEST_DATA_DIR, file1), project_dir1) - shutil.copy(os.path.join(TEST_DATA_DIR, file2), project_dir2) - big_gpkg = os.path.join(project_dir1, file1) - # this will add ~50 MB of zero‐bytes to the file - inflate_gpkg(big_gpkg, blob_size_bytes=1_000_000, rows=50) - - # first_upload_delay = 2 - # resp1 = _delayed_push(mc, project_dir1, first_upload_delay) - # resp2 = _delayed_push(mc, project_dir2, 0) - # if not success: - # resp1. - - # run both pushes concurrently - # with patch("mergin.client_push._do_upload", side_effect=_slow_upload): + if file2 == "many_photos": + create_dummy_photos(project_dir2) + else: + shutil.copy(os.path.join(TEST_DATA_DIR, file2), project_dir2) + with concurrent.futures.ThreadPoolExecutor() as executor: - future1 = executor.submit(push1) - future2 = executor.submit(push2) - # first_upload_delay = 2 - # future1 = executor.submit(_delayed_push, mc, project_dir1, first_upload_delay) - # future2 = executor.submit(_delayed_push, mc2, project_dir2, 0) - # time.sleep(first_upload_delay + 0.2) - # assert not future1.exception() + future1 = executor.submit(sync1) + future2 = executor.submit(sync2) exc2 = future2.exception() exc1 = future1.exception() - # assert not exc2 if not success: error = exc1 if exc1 else exc2 # one is uploads is lucky to pass the other was slow - assert (exc1 is None or exc2 is None) + assert exc1 is None or exc2 is None assert isinstance(error, ClientError) - assert error.detail == "Another process is running. Please try later." - - # assert type(exc1) is ClientError - # assert exc1.http_error == 400 - # assert exc1.detail == "Another process is running. Please try later." - else: - # assert not exc1 - assert not (exc1 or exc2) - # assert (exc1 is None and isinstance(exc2, ClientError) or (exc2 is None and isinstance(exc1, ClientError))) - # if not success: - # assert type(exc2) is ClientError - # assert exc2.http_error == 400 - # assert exc2.detail == "Another process is running. Please try later." - # else: - # assert not exc2 - - + if fail_reason == "another_process": + assert error.http_error == 400 + assert error.detail == "Another process is running. Please try later." + elif fail_reason == "version_conflict": + assert error.http_error == 409 + assert error.detail == "There is already version with this name v1" + else: + assert not (exc1 or exc2) From df1a1c4da4740aa906c1f887921793ad04e1c8d0 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 11 Jul 2025 09:21:06 +0200 Subject: [PATCH 28/31] cleanup --- mergin/client.py | 1 - mergin/merginproject.py | 1 - mergin/test/test_client.py | 2 -- 3 files changed, 4 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 74f36d2..6e64c8c 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -42,7 +42,6 @@ push_project_finalize, push_project_async, push_project_is_running, - ChangesHandler, get_change_batch, ) from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable diff --git a/mergin/merginproject.py b/mergin/merginproject.py index b423abe..4e56ad8 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -1,6 +1,5 @@ import json import logging -import pprint import math import os diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index fadb24b..9eef9a7 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -6,10 +6,8 @@ import tempfile import subprocess import shutil -import time from collections import defaultdict from datetime import datetime, timedelta, date -from unittest.mock import patch import pytest import pytz From bafd30f3c24d000d54a9ebad8d5324628dcde232 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 11 Jul 2025 10:47:01 +0200 Subject: [PATCH 29/31] Get rid of renamed key in changes --- mergin/merginproject.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 4e56ad8..933aa40 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -323,7 +323,7 @@ def compare_file_sets(self, origin, current) -> Dict[str, List[dict]]: >>> origin = [{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}] >>> current = [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}] >>> self.compare_file_sets(origin, current) - {"added": [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}], "removed": [[{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}]], "renamed": [], "updated": []} + {"added": [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}], "removed": [[{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}]], "updated": []} :param origin: origin set of files metadata :type origin: list[dict] :param current: current set of files metadata to be compared against origin @@ -346,7 +346,7 @@ def compare_file_sets(self, origin, current) -> Dict[str, List[dict]]: f["origin_checksum"] = origin_map[path]["checksum"] updated.append(f) - return {"renamed": [], "added": added, "removed": removed, "updated": updated} + return {"added": added, "removed": removed, "updated": updated} def get_pull_changes(self, server_files): """ From e886dfcc3a8dee532feeafaec3ef6f155924d7e1 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 11 Jul 2025 12:14:27 +0200 Subject: [PATCH 30/31] Fix test and filter_changes() --- mergin/client_push.py | 59 +++++----- mergin/test/test_client.py | 229 ++++++++++++++++--------------------- 2 files changed, 127 insertions(+), 161 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index ffeaf1a..d4b4623 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -96,36 +96,15 @@ class ChangesHandler: - Generating upload-ready change groups for asynchronous job creation. """ - def __init__(self, client, mp): + def __init__(self, client, project_dir): self.client = client - self.mp = MerginProject(mp) + self.mp = MerginProject(project_dir) self._raw_changes = self.mp.get_push_changes() @staticmethod def is_blocking_file(file): return is_qgis_file(file["path"]) or is_versioned_file(file["path"]) - def _filter_changes(self, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: - """ - Filters the given changes dictionary based on the editor's enabled state. - - If the editor is not enabled, the changes dictionary is returned as-is. Otherwise, the changes are passed through the `_apply_editor_filters` method to apply any configured filters. - - Args: - changes (dict[str, list[dict]]): A dictionary mapping file paths to lists of change dictionaries. - - Returns: - dict[str, list[dict]]: The filtered changes dictionary. - """ - project_name = self.mp.project_full_name() - try: - project_info = self.client.project_info(project_name) - except Exception as e: - self.mp.log.error(f"Failed to get project info for project {project_name}: {e}") - if not is_editor_enabled(self.client, project_info): - return changes - return _apply_editor_filters(changes) - def _split_by_type(self, changes: Dict[str, List[dict]]) -> List[Dict[str, List[dict]]]: """ Split raw filtered changes into two batches: @@ -159,12 +138,35 @@ def split(self) -> List[Dict[str, List[dict]]]: """ Applies all configured internal filters and returns a list of change ready to be uploaded. """ - changes = self._filter_changes(self._raw_changes) + project_name = self.mp.project_full_name() + try: + project_info = self.client.project_info(project_name) + except ClientError as e: + self.mp.log.error(f"Failed to get project info for project {project_name}: {e}") + raise + changes = self.filter_changes(self.client, project_info, self._raw_changes) changes_list = self._split_by_type(changes) # TODO: apply limits; changes = self._limit_by_file_count(changes) return changes_list +def filter_changes(mc, project_info, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: + """ + Filters the given changes dictionary based on the editor's enabled state. + + If the editor is not enabled, the changes dictionary is returned as-is. Otherwise, the changes are passed through the `_apply_editor_filters` method to apply any configured filters. + + Args: + changes (dict[str, list[dict]]): A dictionary mapping file paths to lists of change dictionaries. + + Returns: + dict[str, list[dict]]: The filtered changes dictionary. + """ + if is_editor_enabled(mc, project_info): + changes = _apply_editor_filters(changes) + return changes + + def get_change_batch(mc, project_dir) -> Tuple[Optional[Dict[str, List[dict]]], bool]: """ Return the next changes dictionary and flag if there are more changes (to be uploaded in the next upload job) @@ -176,13 +178,14 @@ def get_change_batch(mc, project_dir) -> Tuple[Optional[Dict[str, List[dict]]], return changes_list[0], non_empty_length > 1 -def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: +def push_project_async(mc, directory, changes=None) -> Optional[UploadJob]: """ Starts push in background and returns pending upload job. Pushes all project changes unless change_batch is provided. When specific change is provided, initial version check is skipped (the pull has just been done). - :param change_batch: A dictionary of changes that was split to blocking and non-blocking. + :param changes: The changes to upload are either (1) provided (and already split to blocking and bob-blocking batches) + or (2) all local changes are retrieved to upload Pushing only non-blocking changes results in non-exclusive upload which server allows to be concurrent. """ @@ -197,7 +200,7 @@ def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: mp.log.info(f"--- start push {project_path}") # if we have specific change to push we don't need version check - if not change_batch: + if not changes: try: project_info = mc.project_info(project_path) except ClientError as err: @@ -222,8 +225,8 @@ def push_project_async(mc, directory, change_batch=None) -> Optional[UploadJob]: "There is a new version of the project on the server. Please update your local copy." + f"\n\nLocal version: {local_version}\nServer version: {server_version}" ) + changes = filter_changes(mc, project_info, mp.get_push_changes()) - changes = change_batch or mp.get_push_changes() mp.log.debug("push change:\n" + pprint.pformat(changes)) tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 9eef9a7..497e17b 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1,4 +1,3 @@ -import concurrent.futures import json import logging import os @@ -8,11 +7,11 @@ import shutil from collections import defaultdict from datetime import datetime, timedelta, date - import pytest import pytz import sqlite3 import glob +import concurrent.futures from .. import InvalidProject from ..client import ( @@ -24,7 +23,7 @@ TokenError, ServerType, ) -from ..client_push import push_project_cancel, ChangesHandler +from ..client_push import push_project_cancel, ChangesHandler, push_project_async, filter_changes from ..client_pull import ( download_project_async, download_project_wait, @@ -88,7 +87,7 @@ def mcStorage(request): client_workspace_storage = client_workspace["storage"] def teardown(): - # back to original values... (2 projects, api allowed ...) + # back to original values... (1 project, api allowed ...) client.patch( f"/v1/tests/workspaces/{client_workspace_id}", {"limits_override": get_limit_overrides(client_workspace_storage)}, @@ -236,18 +235,18 @@ def test_create_remote_project_from_local(mc): assert source_mp.project_full_name() == f"{API_USER}/{test_project}" assert source_mp.project_name() == test_project assert source_mp.workspace_name() == API_USER - assert source_mp.version() == "v2" # data was split to two pushes - blocking and non-blocking + assert source_mp.version() == "v1" # check basic metadata about created project project_info = mc.project_info(project) - assert project_info["version"] == "v2" + assert project_info["version"] == "v1" assert project_info["name"] == test_project assert project_info["namespace"] == API_USER assert project_info["id"] == source_mp.project_id() # check project metadata retrieval by id project_info = mc.project_info(source_mp.project_id()) - assert project_info["version"] == "v2" + assert project_info["version"] == "v1" assert project_info["name"] == test_project assert project_info["namespace"] == API_USER assert project_info["id"] == source_mp.project_id() @@ -288,7 +287,7 @@ def test_push_pull_changes(mc): assert mp2.project_full_name() == f"{API_USER}/{test_project}" assert mp2.project_name() == test_project assert mp2.workspace_name() == API_USER - assert mp2.version() == "v2" # two pushes - blocking and non-blocking + assert mp2.version() == "v1" # test push changes (add, remove, rename, update) f_added = "new.txt" @@ -307,7 +306,7 @@ def test_push_pull_changes(mc): # check changes before applied pull_changes, push_changes, _ = mc.project_status(project_dir) - assert not any(len(v) for v in pull_changes.values()) + assert not sum(len(v) for v in pull_changes.values()) assert next((f for f in push_changes["added"] if f["path"] == f_added), None) assert next((f for f in push_changes["removed"] if f["path"] == f_removed), None) assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) @@ -320,10 +319,10 @@ def test_push_pull_changes(mc): mp = MerginProject(project_dir) assert mp.project_full_name() == f"{API_USER}/{test_project}" - assert mp.version() == "v4" + assert mp.version() == "v2" project_info = mc.project_info(project) - assert project_info["version"] == "v4" + assert project_info["version"] == "v2" assert not next((f for f in project_info["files"] if f["path"] == f_removed), None) assert not next((f for f in project_info["files"] if f["path"] == f_renamed), None) assert next((f for f in project_info["files"] if f["path"] == "renamed.txt"), None) @@ -332,7 +331,7 @@ def test_push_pull_changes(mc): assert generate_checksum(os.path.join(project_dir, f_updated)) == f_remote_checksum assert project_info["id"] == mp.project_id() assert len(project_info["files"]) == len(mp.inspect_files()) - project_version = mc.project_version_info(project_info["id"], "v3") + project_version = mc.project_version_info(project_info["id"], "v2") updated_file = [f for f in project_version["changes"]["updated"] if f["path"] == f_updated][0] assert "origin_checksum" not in updated_file # internal client info @@ -358,9 +357,9 @@ def test_push_pull_changes(mc): assert not os.path.exists(os.path.join(project_dir_2, f_removed)) assert not os.path.exists(os.path.join(project_dir_2, f_renamed)) assert os.path.exists(os.path.join(project_dir_2, "renamed.txt")) - assert os.path.exists(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 2))) + assert os.path.exists(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 1))) assert ( - generate_checksum(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 2))) + generate_checksum(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 1))) == f_conflict_checksum ) assert generate_checksum(os.path.join(project_dir_2, f_updated)) == f_remote_checksum @@ -391,14 +390,13 @@ def test_cancel_push(mc): # check changes before applied pull_changes, push_changes, _ = mc.project_status(project_dir) - assert not any(len(v) for v in pull_changes.values()) + assert not sum(len(v) for v in pull_changes.values()) assert next((f for f in push_changes["added"] if f["path"] == f_added), None) assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) # start pushing and then cancel the job - jobs = push_changes(mc, project_dir) - for job in jobs: - push_project_cancel(job) + job = push_project_async(mc, project_dir) + push_project_cancel(job) # if cancelled properly, we should be now able to do the push without any problem mc.push_project(project_dir) @@ -406,7 +404,7 @@ def test_cancel_push(mc): # download the project to a different directory and check the version and content mc.download_project(project, project_dir_2) mp = MerginProject(project_dir_2) - assert mp.version() == "v4" # 2 pushes when created + 2 when modified with add and update changes + assert mp.version() == "v2" assert os.path.exists(os.path.join(project_dir_2, f_added)) with open(os.path.join(project_dir_2, f_updated), "r") as f: assert f.read() == modification @@ -468,7 +466,7 @@ def test_sync_diff(mc): # check project after push project_info = mc.project_info(project) - assert project_info["version"] == "v4" # 2 pushes when project was created + assert project_info["version"] == "v3" assert project_info["id"] == mp.project_id() f_remote = next((f for f in project_info["files"] if f["path"] == f_updated), None) assert next((f for f in project_info["files"] if f["path"] == "renamed.gpkg"), None) @@ -566,9 +564,7 @@ def test_force_gpkg_update(mc): # check project after push project_info = mc.project_info(project) - assert ( - project_info["version"] == "v3" - ) # two pushes while creating the project with blocking and non-blocking change + assert project_info["version"] == "v2" f_remote = next((f for f in project_info["files"] if f["path"] == f_updated), None) assert "diff" not in f_remote @@ -966,19 +962,19 @@ def test_download_versions(mc): mc.push_project(project_dir) project_info = mc.project_info(project) - assert project_info["version"] == "v3" # 2 versions created with initial push of blocking and non-blocking files + assert project_info["version"] == "v2" - mc.download_project(project, project_dir_v1, "v2") + mc.download_project(project, project_dir_v1, "v1") assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) - assert not os.path.exists(os.path.join(project_dir_v2, f_added)) # added only in v3 + assert not os.path.exists(os.path.join(project_dir_v2, f_added)) # added only in v2 - mc.download_project(project, project_dir_v2, "v3") + mc.download_project(project, project_dir_v2, "v2") assert os.path.exists(os.path.join(project_dir_v2, f_added)) - assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) # added in v1 but still present in v3 + assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) # added in v1 but still present in v2 # try to download not-existing version with pytest.raises(ClientError): - mc.download_project(project, project_dir_v3, "v4") + mc.download_project(project, project_dir_v3, "v3") def test_paginated_project_list(mc): @@ -1072,13 +1068,11 @@ def create_versioned_project(mc, project_name, project_dir, updated_file, remove # create remote project shutil.copytree(TEST_DATA_DIR, project_dir) - mc.create_project_and_push( - project, project_dir - ) # creates two versions because push was split to blocking and non-blocking + mc.create_project_and_push(project, project_dir) mp = MerginProject(project_dir) - # create versions 3-6 + # create versions 2-4 changes = ( "inserted_1_A.gpkg", "inserted_1_A_mod.gpkg", @@ -1117,7 +1111,7 @@ def test_get_versions_with_file_changes(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False) project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v4" assert project_info["id"] == mp.project_id() file_history = mc.project_file_history_info(project, f_updated) @@ -1127,21 +1121,21 @@ def test_get_versions_with_file_changes(mc): project, f_updated, version_from="v1", - version_to="v6", + version_to="v5", file_history=file_history, ) - assert "Wrong version parameters: 1-6" in str(e.value) - assert "Available versions: [1, 3, 4, 5]" in str(e.value) + assert "Wrong version parameters: 1-5" in str(e.value) + assert "Available versions: [1, 2, 3, 4]" in str(e.value) mod_versions = get_versions_with_file_changes( mc, project, f_updated, - version_from="v3", - version_to="v5", + version_from="v2", + version_to="v4", file_history=file_history, ) - assert mod_versions == [f"v{i}" for i in range(3, 6)] + assert mod_versions == [f"v{i}" for i in range(2, 5)] def check_gpkg_same_content(mergin_project, gpkg_path_1, gpkg_path_2): @@ -1162,7 +1156,7 @@ def test_download_file(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated) project_info = mc.project_info(project) - assert project_info["version"] == "v6" + assert project_info["version"] == "v5" assert project_info["id"] == mp.project_id() # Versioned file should have the following content at versions 2-4 @@ -1174,14 +1168,14 @@ def test_download_file(mc): # Download the base file at versions 2-4 and check the changes f_downloaded = os.path.join(project_dir, f_updated) - for ver in range(3, 6): + for ver in range(2, 5): mc.download_file(project_dir, f_updated, f_downloaded, version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content assert check_gpkg_same_content(mp, f_downloaded, expected) # make sure there will be exception raised if a file doesn't exist in the version - with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v6"): - mc.download_file(project_dir, f_updated, f_downloaded, version="v6") + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): + mc.download_file(project_dir, f_updated, f_downloaded, version="v5") def test_download_diffs(mc): @@ -1196,24 +1190,24 @@ def test_download_diffs(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False) project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v4" assert project_info["id"] == mp.project_id() - # Download diffs of updated file between versions 1 and 3 - mc.get_file_diff(project_dir, f_updated, diff_file, "v1", "v3") + # Download diffs of updated file between versions 1 and 2 + mc.get_file_diff(project_dir, f_updated, diff_file, "v1", "v2") assert os.path.exists(diff_file) assert mp.geodiff.has_changes(diff_file) assert mp.geodiff.changes_count(diff_file) == 1 - changes_file = diff_file + ".changes1-3" + changes_file = diff_file + ".changes1-2" mp.geodiff.list_changes_summary(diff_file, changes_file) with open(changes_file, "r") as f: changes = json.loads(f.read())["geodiff_summary"][0] assert changes["insert"] == 1 assert changes["update"] == 0 - # Download diffs of updated file between versions 3 and 5 - mc.get_file_diff(project_dir, f_updated, diff_file, "v3", "v5") - changes_file = diff_file + ".changes3-5" + # Download diffs of updated file between versions 2 and 4 + mc.get_file_diff(project_dir, f_updated, diff_file, "v2", "v4") + changes_file = diff_file + ".changes2-4" mp.geodiff.list_changes_summary(diff_file, changes_file) with open(changes_file, "r") as f: changes = json.loads(f.read())["geodiff_summary"][0] @@ -1221,14 +1215,14 @@ def test_download_diffs(mc): assert changes["update"] == 1 with pytest.raises(ClientError) as e: - mc.get_file_diff(project_dir, f_updated, diff_file, "v5", "v1") + mc.get_file_diff(project_dir, f_updated, diff_file, "v4", "v1") assert "Wrong version parameters" in str(e.value) assert "version_from needs to be smaller than version_to" in str(e.value) with pytest.raises(ClientError) as e: - mc.get_file_diff(project_dir, f_updated, diff_file, "v5", "v6") + mc.get_file_diff(project_dir, f_updated, diff_file, "v4", "v5") assert "Wrong version parameters" in str(e.value) - assert "Available versions: [1, 3, 4, 5]" in str(e.value) + assert "Available versions: [1, 2, 3, 4]" in str(e.value) def test_modify_project_permissions(mc): @@ -1995,13 +1989,13 @@ def test_project_versions_list(mc): project_dir = os.path.join(TMP_DIR, test_project) create_versioned_project(mc, test_project, project_dir, "base.gpkg") project_info = mc.project_info(project) - assert project_info["version"] == "v6" + assert project_info["version"] == "v5" # get all versions versions = mc.project_versions(project) - assert len(versions) == 6 + assert len(versions) == 5 assert versions[0]["name"] == "v1" - assert versions[-1]["name"] == "v6" + assert versions[-1]["name"] == "v5" # get first 3 versions versions = mc.project_versions(project, to="v3") @@ -2010,7 +2004,7 @@ def test_project_versions_list(mc): # get last 2 versions versions = mc.project_versions(project, since="v4") - assert len(versions) == 3 + assert len(versions) == 2 assert versions[0]["name"] == "v4" # get range v2-v4 @@ -2020,11 +2014,11 @@ def test_project_versions_list(mc): assert versions[-1]["name"] == "v4" versions_count = mc.project_versions_count(project) - assert versions_count == 6 + assert versions_count == 5 versions, _ = mc.paginated_project_versions(project, page=1, descending=True) - assert len(versions) == 6 - assert versions[0]["name"] == "v6" + assert len(versions) == 5 + assert versions[0]["name"] == "v5" assert versions[-1]["name"] == "v1" @@ -2035,10 +2029,10 @@ def test_report(mc): f_updated = "base.gpkg" mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False, overwrite=True) - # create report for between versions 3 and 5 + # create report for between versions 2 and 4 directory = mp.dir - since = "v3" - to = "v5" + since = "v2" + to = "v4" proj_name = project.replace(os.path.sep, "-") report_file = os.path.join(TMP_DIR, "report", f"{proj_name}-{since}-{to}.csv") if os.path.exists(report_file): @@ -2065,7 +2059,7 @@ def test_report(mc): ) assert headers in content assert f"base.gpkg,simple,{API_USER}" in content - assert "v4,update,,,2" in content + assert "v3,update,,,2" in content # files not edited are not in reports assert "inserted_1_A.gpkg" not in content @@ -2074,7 +2068,7 @@ def test_report(mc): assert warnings # do report for v1 with added files and v5 with overwritten file - warnings = create_report(mc, directory, "v1", "v6", report_file) + warnings = create_report(mc, directory, "v1", "v5", report_file) assert warnings # rm local Mergin Maps project and try again @@ -2182,11 +2176,11 @@ def test_changesets_download(mc): mp = MerginProject(project_dir) os.makedirs(download_dir, exist_ok=True) - diff_file = os.path.join(download_dir, "base-v1-3.diff") - mc.get_file_diff(project_dir, test_gpkg, diff_file, "v1", "v3") + diff_file = os.path.join(download_dir, "base-v1-2.diff") + mc.get_file_diff(project_dir, test_gpkg, diff_file, "v1", "v2") assert os.path.exists(diff_file) assert mp.geodiff.has_changes(diff_file) - assert mp.geodiff.changes_count(diff_file) == 3 + assert mp.geodiff.changes_count(diff_file) == 1 diff_file = os.path.join(download_dir, "base-v2-3.diff") mc.get_file_diff(project_dir, test_gpkg, diff_file, "v2", "v3") @@ -2223,10 +2217,10 @@ def test_version_info(mc): shutil.copy(os.path.join(TEST_DATA_DIR, "inserted_1_A_mod.gpkg"), file_path) mc.push_project(project_dir) project_info = mc.project_info(project) - info = mc.project_version_info(project_info.get("id"), "v3") + info = mc.project_version_info(project_info.get("id"), "v2") assert info["namespace"] == API_USER assert info["project_name"] == test_project - assert info["name"] == "v3" + assert info["name"] == "v2" assert info["author"] == API_USER created = datetime.strptime(info["created"], "%Y-%m-%dT%H:%M:%SZ") assert created.date() == date.today() @@ -2352,8 +2346,8 @@ def test_reset_local_changes(mc: MerginClient): os.remove(mp.fpath("test.txt")) os.remove(mp.fpath("test_dir/test2.txt")) - # download version 3 and create MerginProject for it - mc.download_project(project, project_dir_2, version="v3") + # download version 2 and create MerginProject for it + mc.download_project(project, project_dir_2, version="v2") mp = MerginProject(project_dir_2) # make some changes @@ -2368,7 +2362,7 @@ def test_reset_local_changes(mc: MerginClient): assert len(push_changes["removed"]) == 2 assert len(push_changes["updated"]) == 0 - # reset back to original version we had - v3 + # reset back to original version we had - v2 mc.reset_local_changes(project_dir_2) # push changes after the reset - should be none @@ -2448,7 +2442,7 @@ def test_project_rename(mc: MerginClient): # validate project info project_info = mc.project_info(project_renamed) - assert project_info["version"] == "v2" # teo version created in initial push + assert project_info["version"] == "v1" assert project_info["name"] == test_project_renamed assert project_info["namespace"] == API_USER with pytest.raises(ClientError, match="The requested URL was not found on the server"): @@ -2489,7 +2483,7 @@ def test_download_files(mc: MerginClient): mp = create_versioned_project(mc, test_project, project_dir, f_updated) project_info = mc.project_info(project) - assert project_info["version"] == "v6" + assert project_info["version"] == "v5" assert project_info["id"] == mp.project_id() # Versioned file should have the following content at versions 2-4 @@ -2502,15 +2496,15 @@ def test_download_files(mc: MerginClient): downloaded_file = os.path.join(download_dir, f_updated) # if output_paths is specified look at that location - for ver in range(3, 6): + for ver in range(2, 5): mc.download_files(project_dir, [f_updated], [downloaded_file], version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content assert check_gpkg_same_content(mp, downloaded_file, expected) # if output_paths is not specified look in the mergin project folder - for ver in range(3, 6): + for ver in range(2, 5): mc.download_files(project_dir, [f_updated], version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content assert check_gpkg_same_content(mp, mp.fpath(f_updated), expected) # download two files from v1 and check their content @@ -2521,7 +2515,7 @@ def test_download_files(mc: MerginClient): project_dir, [f_updated, file_2], [downloaded_file, downloaded_file_2], - version="v2", + version="v1", ) assert check_gpkg_same_content(mp, downloaded_file, os.path.join(TEST_DATA_DIR, f_updated)) @@ -2533,11 +2527,11 @@ def test_download_files(mc: MerginClient): assert content_exp == content # make sure there will be exception raised if a file doesn't exist in the version - with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v6"): - mc.download_files(project_dir, [f_updated], version="v6") + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): + mc.download_files(project_dir, [f_updated], version="v5") - with pytest.raises(ClientError, match=f"No \\[non_existing\\.file\\] exists at version v4"): - mc.download_files(project_dir, [f_updated, "non_existing.file"], version="v4") + with pytest.raises(ClientError, match=f"No \\[non_existing\\.file\\] exists at version v3"): + mc.download_files(project_dir, [f_updated, "non_existing.file"], version="v3") def test_download_failure(mc): @@ -2594,16 +2588,6 @@ def test_editor(mc: MerginClient): project_info["role"] = EDITOR_ROLE_NAME assert is_editor_enabled(mc, project_info) is True - # unit test for editor methods - qgs_changeset = { - "added": [{"path": "/folder/project.new.Qgz"}], - "updated": [{"path": "/folder/project.updated.Qgs"}], - "removed": [{"path": "/folder/project.removed.qgs"}], - } - changes_handler = ChangesHandler(mc, project_info, qgs_changeset) - qgs_changeset = changes_handler._filter_changes(qgs_changeset) - assert sum(len(v) for v in qgs_changeset.values()) == 2 - def test_editor_push(mc: MerginClient, mc2: MerginClient): """Test push with editor""" @@ -2638,8 +2622,8 @@ def test_editor_push(mc: MerginClient, mc2: MerginClient): assert any(file["path"] == txt_file_name for file in project_info.get("files")) is True assert any(file["path"] == gpkg_file_name for file in project_info.get("files")) is True pull_changes, push_changes, push_changes_summary = mc.project_status(project_dir) - assert not any(len(v) for v in pull_changes.values()) - assert not any(len(v) for v in push_changes.values()) + assert not sum(len(v) for v in pull_changes.values()) + assert not sum(len(v) for v in push_changes.values()) # editor is trying to push row to gpkg file -> it's possible shutil.copy( @@ -2690,7 +2674,6 @@ def test_editor_push(mc: MerginClient, mc2: MerginClient): def test_error_push_already_named_project(mc: MerginClient): test_project = "test_push_already_existing" project_dir = os.path.join(TMP_DIR, test_project) - cleanup(mc, test_project, [project_dir]) with pytest.raises(ClientError) as e: mc.create_project_and_push(test_project, project_dir) @@ -2894,9 +2877,12 @@ def test_mc_without_login(): assert config["server_configured"] # without login should not be able to access workspaces - with pytest.raises(ClientError, match="Authentication information is missing or invalid."): + with pytest.raises(ClientError) as e: mc.workspaces_list() + assert e.value.http_error == 401 + assert e.value.detail == "Authentication information is missing or invalid." + def _sort_dict_of_files_by_path(d): return {k: sorted(v, key=lambda f: f["path"]) for k, v in d.items()} @@ -2918,7 +2904,7 @@ def test_changes_handler(mc): mp.write_metadata(project_dir, project_info) mixed_changes = mp.get_push_changes() - changes_handler = ChangesHandler(mc, project_info, mixed_changes) + changes_handler = ChangesHandler(mc, project_dir) split_changes = changes_handler._split_by_type(mixed_changes) assert len(split_changes) == 2 # all blocking files in the first dict and no blocking file in the second dict @@ -2931,38 +2917,15 @@ def test_changes_handler(mc): merged[k].extend(v) assert _sort_dict_of_files_by_path(merged) == _sort_dict_of_files_by_path(mixed_changes) - -# import sqlite3 -# import os - - -def inflate_gpkg(path, blob_size_bytes=1024 * 1024, rows=50): - """ - Append a table named 'inflate' to the GeoPackage at `path`, - then insert `rows` rows each containing a BLOB of size `blob_size_bytes`. - This will grow the file by roughly rows * blob_size_bytes. - """ - # make sure file exists - if not os.path.exists(path): - raise FileNotFoundError(path) - con = sqlite3.connect(path) - cur = con.cursor() - # 1) create the dummy table if it doesn't already exist - cur.execute( - """ - CREATE TABLE IF NOT EXISTS inflate ( - id INTEGER PRIMARY KEY, - data BLOB NOT NULL - ); - """ - ) - # 2) prepare one blob of the given size - dummy_blob = sqlite3.Binary(b"\x00" * blob_size_bytes) - # 3) insert a bunch of rows - for _ in range(rows): - cur.execute("INSERT INTO inflate (data) VALUES (?);", (dummy_blob,)) - con.commit() - con.close() + # test filter_changes + project_info = {"role": EDITOR_ROLE_NAME} + qgs_changeset = { + "added": [{"path": "/folder/project.new.Qgz"}], + "updated": [{"path": "/folder/project.updated.Qgs"}], + "removed": [{"path": "/folder/project.removed.qgs"}], + } + qgs_changeset = filter_changes(mc, project_info, qgs_changeset) + assert sum(len(v) for v in qgs_changeset.values()) == 2 def create_dummy_photos(dir_path, count=20, size_kb=5000): From 81e79096d7708e047e7c356dedcd1cec7b45a3b2 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 11 Jul 2025 13:16:03 +0200 Subject: [PATCH 31/31] Fix tests --- mergin/client.py | 4 +++- mergin/client_push.py | 2 +- mergin/test/test_client.py | 19 ++++++++++--------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 6e64c8c..4d8b75d 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -934,7 +934,9 @@ def sync_project(self, project_dir): 1. Pull server version 2. Get local changes 3. Push first change batch - Repeat if there are more changes pending. + Repeat if there are more local changes. + The batch pushing makes use of the server ability to handle simultaneously exclusive upload (that blocks + other uploads) and non-exclusive upload (for adding assets) """ has_more_changes = True while has_more_changes: diff --git a/mergin/client_push.py b/mergin/client_push.py index d4b4623..2c9594d 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -144,7 +144,7 @@ def split(self) -> List[Dict[str, List[dict]]]: except ClientError as e: self.mp.log.error(f"Failed to get project info for project {project_name}: {e}") raise - changes = self.filter_changes(self.client, project_info, self._raw_changes) + changes = filter_changes(self.client, project_info, self._raw_changes) changes_list = self._split_by_type(changes) # TODO: apply limits; changes = self._limit_by_file_count(changes) return changes_list diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 497e17b..5d095e9 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -313,7 +313,8 @@ def test_push_pull_changes(mc): # renamed file will result in removed + added file assert next((f for f in push_changes["removed"] if f["path"] == f_renamed), None) assert next((f for f in push_changes["added"] if f["path"] == "renamed.txt"), None) - assert not pull_changes["renamed"] # not supported + assert not pull_changes.get("renamed") # not supported + assert not push_changes.get("renamed") # not supported mc.push_project(project_dir) @@ -2944,22 +2945,22 @@ def create_dummy_photos(dir_path, count=20, size_kb=5000): False, "another_process", ), # both pushes are exclusive, the latter one is refused - ( - "inserted_1_A.gpkg", - "test.txt", - False, - "version_conflict", - ), # small files pushed at the same time might result in version conflict due to race condition + # ( + # "inserted_1_A.gpkg", + # "test.txt", + # False, + # "version_conflict", + # ), # small files pushed at the same time might result in version conflict due to race condition ("inserted_1_A.gpkg", "many_photos", True, None), # the upload of many photos does not block the other upload ] @pytest.mark.parametrize("file1,file2,success,fail_reason", files_to_push) -def test_exclusive_upload(mc, mc2, file1, file2, success, fail_reason): +def test_sync_project(mc, mc2, file1, file2, success, fail_reason): """ Test two clients pushing at the same time """ - test_project_name = "test_exclusive_upload" + test_project_name = "test_sync_project" project_dir1 = os.path.join(TMP_DIR, test_project_name + "_1") project_dir2 = os.path.join(TMP_DIR, test_project_name + "_2")