Skip to content

Commit 21ffda0

Browse files
authored
Merge pull request #45 from lutraconsulting/fix-sync-issue
Try to fix sync issue in python client (#119)
2 parents 2943893 + e9e16e0 commit 21ffda0

File tree

2 files changed

+34
-19
lines changed

2 files changed

+34
-19
lines changed

mergin/client.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ def get_pull_changes(self, server_files):
271271
if not self.geodiff:
272272
return changes
273273

274-
size_limit = int(os.environ.get('DIFFS_LIMIT_SIZE', 1024 * 1024)) # with smaller values than limit download full file instead of diffs
275274
not_updated = []
276275
for file in changes['updated']:
277276
# for small geodiff files it does not make sense to download diff and then apply it (slow)
@@ -294,8 +293,7 @@ def get_pull_changes(self, server_files):
294293
break # we found force update in history, does not make sense to download diffs
295294

296295
if is_updated:
297-
if diffs and file['size'] > size_limit and diffs_size < file['size']/2:
298-
file['diffs'] = diffs
296+
file['diffs'] = diffs
299297
else:
300298
not_updated.append(file)
301299

@@ -455,9 +453,23 @@ def apply_pull_changes(self, changes, temp_dir):
455453
if os.path.exists(f'{dest}-shm'):
456454
os.remove(f'{dest}-shm')
457455
else:
458-
# just use server version of file to update both project file and its basefile
459-
shutil.copy(src, dest)
460-
shutil.copy(src, basefile)
456+
# The local file is not modified -> no rebase needed.
457+
# We just apply the diff between our copy and server to both the local copy and its basefile
458+
try:
459+
server_diff = self.fpath(f'{path}-server_diff', temp_dir) # diff between server file and local basefile
460+
# TODO: it could happen that basefile does not exist.
461+
# It was either never created (e.g. when pushing without geodiff)
462+
# or it was deleted by mistake(?) by the user. We should detect that
463+
# when starting pull and download it as well
464+
self.geodiff.create_changeset(basefile, src, server_diff)
465+
self.geodiff.apply_changeset(dest, server_diff)
466+
self.geodiff.apply_changeset(basefile, server_diff)
467+
except (pygeodiff.GeoDiffLibError, pygeodiff.GeoDiffLibConflictError):
468+
# something bad happened and we have failed to patch our local files - this should not happen if there
469+
# wasn't a schema change or something similar that geodiff can't handle.
470+
# FIXME: this is a last resort and may corrupt data! (we should warn user)
471+
shutil.copy(src, dest)
472+
shutil.copy(src, basefile)
461473
else:
462474
# backup if needed
463475
if path in modified and item['checksum'] != local_files_map[path]['checksum']:
@@ -1011,6 +1023,11 @@ def pull_project(self, directory, parallel=True):
10111023
if local_version == server_info["version"]:
10121024
return # Project is up to date
10131025

1026+
# we either download a versioned file using diffs (strongly preferred),
1027+
# but if we don't have history with diffs (e.g. uploaded without diffs)
1028+
# then we just download the whole file
1029+
_pulling_file_with_diffs = lambda f: 'diffs' in f and len(f['diffs']) != 0
1030+
10141031
temp_dir = mp.fpath_meta(f'fetch_{local_version}-{server_info["version"]}')
10151032
os.makedirs(temp_dir, exist_ok=True)
10161033
pull_changes = mp.get_pull_changes(server_info["files"])
@@ -1020,7 +1037,7 @@ def pull_project(self, directory, parallel=True):
10201037
fetch_files.append(f)
10211038
# extend fetch files download list with various version of diff files (if needed)
10221039
for f in pull_changes["updated"]:
1023-
if 'diffs' in f:
1040+
if _pulling_file_with_diffs(f):
10241041
for diff in f['diffs']:
10251042
diff_file = copy.deepcopy(f)
10261043
for k, v in f['history'].items():
@@ -1039,7 +1056,7 @@ def pull_project(self, directory, parallel=True):
10391056
with concurrent.futures.ThreadPoolExecutor() as executor:
10401057
futures_map = {}
10411058
for file in fetch_files:
1042-
diff_only = 'diffs' in file
1059+
diff_only = _pulling_file_with_diffs(f)
10431060
future = executor.submit(self._download_file, project_path, file, temp_dir, parallel, diff_only)
10441061
futures_map[future] = file
10451062

@@ -1052,13 +1069,13 @@ def pull_project(self, directory, parallel=True):
10521069
else:
10531070
for file in fetch_files:
10541071
# TODO check it does not fail, do some retry on ClientError
1055-
diff_only = 'diffs' in file
1072+
diff_only = _pulling_file_with_diffs(f)
10561073
self._download_file(project_path, file, temp_dir, parallel, diff_only)
10571074

10581075
# make sure we can update geodiff reference files (aka. basefiles) with diffs or
10591076
# download their full versions so we have them up-to-date for applying changes
10601077
for file in pull_changes['updated']:
1061-
if 'diffs' not in file:
1078+
if not _pulling_file_with_diffs(f):
10621079
continue
10631080
file['version'] = server_info['version']
10641081
basefile = mp.fpath_meta(file['path'])

mergin/test/test_client.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -209,18 +209,17 @@ def test_ignore_files(mc):
209209

210210
# (diffs size limit, push geodiff enabled, pull geodiff enabled)
211211
diff_test_scenarios = [
212-
(1024, True, True),
213-
(1024*1024, True, True),
214-
(1024, True, False),
215-
(1024, False, True),
216-
(1024, False, False),
212+
(True, True),
213+
(True, False),
214+
(False, True),
215+
(False, False),
217216
]
218217

219218

220-
@pytest.mark.parametrize("diffs_limit, push_geodiff_enabled, pull_geodiff_enabled", diff_test_scenarios)
221-
def test_sync_diff(mc, diffs_limit, push_geodiff_enabled, pull_geodiff_enabled):
219+
@pytest.mark.parametrize("push_geodiff_enabled, pull_geodiff_enabled", diff_test_scenarios)
220+
def test_sync_diff(mc, push_geodiff_enabled, pull_geodiff_enabled):
222221

223-
test_project = f'test_sync_diff_{diffs_limit}_{int(push_geodiff_enabled)}_{int(pull_geodiff_enabled)}'
222+
test_project = f'test_sync_diff_push{int(push_geodiff_enabled)}_pull{int(pull_geodiff_enabled)}'
224223
project = API_USER + '/' + test_project
225224
project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates
226225
project_dir_2 = os.path.join(TMP_DIR, test_project + '_2') # concurrent project dir with no changes
@@ -277,7 +276,6 @@ def test_sync_diff(mc, diffs_limit, push_geodiff_enabled, pull_geodiff_enabled):
277276
assert not os.path.exists(mp.fpath_meta('renamed.gpkg'))
278277

279278
# pull project in different directory
280-
os.environ['DIFFS_LIMIT_SIZE'] = str(diffs_limit)
281279
toggle_geodiff(pull_geodiff_enabled)
282280
mp2 = MerginProject(project_dir_2)
283281
mc.pull_project(project_dir_2)

0 commit comments

Comments
 (0)