Skip to content

Commit 2943893

Browse files
authored
Merge pull request #43 from lutraconsulting/checksum_issue
fix checksum issue
2 parents c933b59 + 9d00f58 commit 2943893

File tree

7 files changed

+82
-27
lines changed

7 files changed

+82
-27
lines changed

mergin/client.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,22 @@ def is_versioned_file(self, file):
134134
f_extension = os.path.splitext(file)[1]
135135
return f_extension in diff_extensions
136136

137+
def is_gpkg_open(self, path):
138+
"""
139+
Check whether geopackage file is open (and wal file exists)
140+
141+
:param path: absolute path of file on disk
142+
:type path: str
143+
:returns: whether file is open
144+
:rtype: bool
145+
"""
146+
f_extension = os.path.splitext(path)[1]
147+
if f_extension != '.gpkg':
148+
return False
149+
if os.path.exists(f'{path}-wal'):
150+
return True
151+
return False
152+
137153
def ignore_file(self, file):
138154
"""
139155
Helper function for blacklisting certain types of files.
@@ -165,6 +181,7 @@ def inspect_files(self):
165181
for file in files:
166182
if self.ignore_file(file):
167183
continue
184+
168185
abs_path = os.path.abspath(os.path.join(root, file))
169186
rel_path = os.path.relpath(abs_path, start=self.dir)
170187
proj_path = '/'.join(rel_path.split(os.path.sep)) # we need posix path
@@ -222,7 +239,8 @@ def compare_file_sets(self, origin, current):
222239
path = f["path"]
223240
if path not in origin_map:
224241
continue
225-
if f["checksum"] == origin_map[path]["checksum"]:
242+
# with open WAL files we don't know yet, better to mark file as updated
243+
if not self.is_gpkg_open(self.fpath(path)) and f["checksum"] == origin_map[path]["checksum"]:
226244
continue
227245
f["origin_checksum"] = origin_map[path]["checksum"]
228246
updated.append(f)
@@ -298,7 +316,12 @@ def get_push_changes(self):
298316
:rtype: dict
299317
"""
300318
changes = self.compare_file_sets(self.metadata['files'], self.inspect_files())
319+
# do checkpoint to push changes from wal file to gpkg
301320
for file in changes['added'] + changes['updated']:
321+
size, checksum = do_sqlite_checkpoint(self.fpath(file["path"]))
322+
if size and checksum:
323+
file["size"] = size
324+
file["checksum"] = checksum
302325
file['chunks'] = [str(uuid.uuid4()) for i in range(math.ceil(file["size"] / UPLOAD_CHUNK_SIZE))]
303326

304327
if not self.geodiff:
@@ -311,8 +334,9 @@ def get_push_changes(self):
311334
if not self.is_versioned_file(path):
312335
continue
313336

337+
# we use geodiff to check if we can push only diff files
314338
current_file = self.fpath(path)
315-
origin_file = self.fpath(path, self.meta_dir)
339+
origin_file = self.fpath_meta(path)
316340
diff_id = str(uuid.uuid4())
317341
diff_name = path + '-diff-' + diff_id
318342
diff_file = self.fpath_meta(diff_name)
@@ -332,7 +356,8 @@ def get_push_changes(self):
332356
else:
333357
not_updated.append(file)
334358
except (pygeodiff.GeoDiffLibError, pygeodiff.GeoDiffLibConflictError) as e:
335-
pass # we do force update
359+
# changes from wal file already committed
360+
pass
336361

337362
changes['updated'] = [f for f in changes['updated'] if f not in not_updated]
338363
return changes
@@ -477,15 +502,16 @@ def apply_push_changes(self, changes):
477502
elif k == 'added':
478503
shutil.copy(self.fpath(path), basefile)
479504
elif k == 'updated':
480-
# in case for geopackage cannot be created diff
505+
# in case for geopackage cannot be created diff (e.g. forced update with committed changes from wal file)
481506
if "diff" not in item:
482-
continue
483-
# better to apply diff to previous basefile to avoid issues with geodiff tmp files
484-
changeset = self.fpath_meta(item['diff']['path'])
485-
patch_error = self.apply_diffs(basefile, [changeset])
486-
if patch_error:
487-
# in case of local sync issues it is safier to remove basefile, next time it will be downloaded from server
488-
os.remove(basefile)
507+
shutil.copy(self.fpath(path), basefile)
508+
else:
509+
# better to apply diff to previous basefile to avoid issues with geodiff tmp files
510+
changeset = self.fpath_meta(item['diff']['path'])
511+
patch_error = self.apply_diffs(basefile, [changeset])
512+
if patch_error:
513+
# in case of local sync issues it is safier to remove basefile, next time it will be downloaded from server
514+
os.remove(basefile)
489515
else:
490516
pass
491517

@@ -944,10 +970,6 @@ def _push_changes(self, mp, data, parallel):
944970
with concurrent.futures.ThreadPoolExecutor() as executor:
945971
futures_map = {}
946972
for file in upload_files:
947-
# do checkpoint to push changes from wal file to gpkg if there is no diff
948-
if "diff" not in file and mp.is_versioned_file(file["path"]):
949-
do_sqlite_checkpoint(mp.fpath(file["path"]))
950-
file["checksum"] = generate_checksum(mp.fpath(file["path"]))
951973
file['location'] = mp.fpath_meta(file['diff']['path']) if 'diff' in file else mp.fpath(file['path'])
952974
future = executor.submit(self._upload_file, info["transaction"], file, parallel)
953975
futures_map[future] = file
@@ -960,10 +982,6 @@ def _push_changes(self, mp, data, parallel):
960982
raise ClientError("Timeout error: failed to upload {}".format(file))
961983
else:
962984
for file in upload_files:
963-
# do checkpoint to push changes from wal file to gpkg if there is no diff
964-
if "diff" not in file and mp.is_versioned_file(file["path"]):
965-
do_sqlite_checkpoint(mp.fpath(file["path"]))
966-
file["checksum"] = generate_checksum(mp.fpath(file["path"]))
967985
file['location'] = mp.fpath_meta(file['diff']['path']) if 'diff' in file else mp.fpath(file['path'])
968986
self._upload_file(info["transaction"], file, parallel)
969987

@@ -1085,6 +1103,7 @@ def _download_file(self, project_path, file, directory, parallel=True, diff_only
10851103
}
10861104
file_dir = os.path.dirname(os.path.normpath(os.path.join(directory, file['path'])))
10871105
basename = os.path.basename(file['diff']['path']) if diff_only else os.path.basename(file['path'])
1106+
expected_size = file['diff']['size'] if diff_only else file['size']
10881107

10891108
if file['size'] == 0:
10901109
os.makedirs(file_dir, exist_ok=True)
@@ -1125,7 +1144,7 @@ def download_file_part(part):
11251144
shutil.copyfileobj(chunk, final)
11261145
os.remove(file_part)
11271146

1128-
if os.path.getsize(os.path.join(file_dir, basename)) != file['size']:
1147+
if os.path.getsize(os.path.join(file_dir, basename)) != expected_size:
11291148
os.remove(os.path.join(file_dir, basename))
11301149
raise ClientError(f'Download of file {basename} failed. Please try it again.')
11311150

-32 KB
Binary file not shown.
-32.2 KB
Binary file not shown.
Binary file not shown.

mergin/test/test_client.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,6 @@ def test_push_pull_changes(mc, parallel):
136136
f_updated = 'test3.txt'
137137
with open(os.path.join(project_dir, f_updated), 'w') as f:
138138
f.write('Modified')
139-
src_files = os.listdir(CHANGED_SCHEMA_DIR)
140-
for file_name in src_files:
141-
full_file_name = os.path.join(CHANGED_SCHEMA_DIR, file_name)
142-
if os.path.isfile(full_file_name):
143-
shutil.copy(full_file_name, project_dir)
144139

145140
# check changes before applied
146141
pull_changes, push_changes, _ = mc.project_status(project_dir)
@@ -342,3 +337,33 @@ def test_list_of_push_changes(mc):
342337
mc.project_status(project_dir)
343338

344339

340+
def test_force_gpkg_update(mc):
341+
test_project = 'test_force_update'
342+
project = API_USER + '/' + test_project
343+
project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates
344+
345+
cleanup(mc, project, [project_dir])
346+
# create remote project
347+
shutil.copytree(TEST_DATA_DIR, project_dir)
348+
mc.create_project(test_project, project_dir)
349+
350+
# test push changes with force gpkg file upload:
351+
mp = MerginProject(project_dir)
352+
f_updated = 'base.gpkg'
353+
checksum = generate_checksum(mp.fpath(f_updated))
354+
355+
# base.gpkg updated to modified_schema (inserted new column)
356+
shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation (which will fail)
357+
shutil.copy(os.path.join(CHANGED_SCHEMA_DIR, 'modified_schema.gpkg'), mp.fpath(f_updated))
358+
shutil.copy(os.path.join(CHANGED_SCHEMA_DIR, 'modified_schema.gpkg-wal'), mp.fpath(f_updated + '-wal'))
359+
mc.push_project(project_dir)
360+
# by this point local file has been updated (changes committed from wal)
361+
updated_checksum = generate_checksum(mp.fpath(f_updated))
362+
assert checksum != updated_checksum
363+
364+
# check project after push
365+
project_info = mc.project_info(project)
366+
assert project_info['version'] == 'v2'
367+
f_remote = next((f for f in project_info['files'] if f['path'] == f_updated), None)
368+
assert f_remote['checksum'] == updated_checksum
369+
assert 'diff' not in f_remote

mergin/utils.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,23 @@ def int_version(version):
7171

7272
def do_sqlite_checkpoint(path):
7373
"""
74-
function to do checkpoint over the geopackage file which was not able to do diff file
74+
Function to do checkpoint over the geopackage file which was not able to do diff file.
75+
76+
:param path: file's absolute path on disk
77+
:type path: str
78+
:returns: new size and checksum of file after checkpoint
79+
:rtype: int, str
7580
"""
76-
if ".gpkg" in path and os.path.exists(f'{path}-wal') and os.path.exists(f'{path}-shm'):
81+
new_size = None
82+
new_checksum = None
83+
if ".gpkg" in path and os.path.exists(f'{path}-wal'):
7784
conn = sqlite3.connect(path)
7885
cursor = conn.cursor()
7986
cursor.execute("PRAGMA wal_checkpoint=FULL")
8087
cursor.execute("VACUUM")
8188
conn.commit()
8289
conn.close()
90+
new_size = os.path.getsize(path)
91+
new_checksum = generate_checksum(path)
92+
93+
return new_size, new_checksum

0 commit comments

Comments
 (0)