@@ -271,7 +271,6 @@ def get_pull_changes(self, server_files):
271
271
if not self .geodiff :
272
272
return changes
273
273
274
- size_limit = int (os .environ .get ('DIFFS_LIMIT_SIZE' , 1024 * 1024 )) # with smaller values than limit download full file instead of diffs
275
274
not_updated = []
276
275
for file in changes ['updated' ]:
277
276
# for small geodiff files it does not make sense to download diff and then apply it (slow)
@@ -294,8 +293,7 @@ def get_pull_changes(self, server_files):
294
293
break # we found force update in history, does not make sense to download diffs
295
294
296
295
if is_updated :
297
- if diffs and file ['size' ] > size_limit and diffs_size < file ['size' ]/ 2 :
298
- file ['diffs' ] = diffs
296
+ file ['diffs' ] = diffs
299
297
else :
300
298
not_updated .append (file )
301
299
@@ -455,9 +453,23 @@ def apply_pull_changes(self, changes, temp_dir):
455
453
if os .path .exists (f'{ dest } -shm' ):
456
454
os .remove (f'{ dest } -shm' )
457
455
else :
458
- # just use server version of file to update both project file and its basefile
459
- shutil .copy (src , dest )
460
- shutil .copy (src , basefile )
456
+ # The local file is not modified -> no rebase needed.
457
+ # We just apply the diff between our copy and server to both the local copy and its basefile
458
+ try :
459
+ server_diff = self .fpath (f'{ path } -server_diff' , temp_dir ) # diff between server file and local basefile
460
+ # TODO: it could happen that basefile does not exist.
461
+ # It was either never created (e.g. when pushing without geodiff)
462
+ # or it was deleted by mistake(?) by the user. We should detect that
463
+ # when starting pull and download it as well
464
+ self .geodiff .create_changeset (basefile , src , server_diff )
465
+ self .geodiff .apply_changeset (dest , server_diff )
466
+ self .geodiff .apply_changeset (basefile , server_diff )
467
+ except (pygeodiff .GeoDiffLibError , pygeodiff .GeoDiffLibConflictError ):
468
+ # something bad happened and we have failed to patch our local files - this should not happen if there
469
+ # wasn't a schema change or something similar that geodiff can't handle.
470
+ # FIXME: this is a last resort and may corrupt data! (we should warn user)
471
+ shutil .copy (src , dest )
472
+ shutil .copy (src , basefile )
461
473
else :
462
474
# backup if needed
463
475
if path in modified and item ['checksum' ] != local_files_map [path ]['checksum' ]:
@@ -1011,6 +1023,11 @@ def pull_project(self, directory, parallel=True):
1011
1023
if local_version == server_info ["version" ]:
1012
1024
return # Project is up to date
1013
1025
1026
+ # we either download a versioned file using diffs (strongly preferred),
1027
+ # but if we don't have history with diffs (e.g. uploaded without diffs)
1028
+ # then we just download the whole file
1029
+ _pulling_file_with_diffs = lambda f : 'diffs' in f and len (f ['diffs' ]) != 0
1030
+
1014
1031
temp_dir = mp .fpath_meta (f'fetch_{ local_version } -{ server_info ["version" ]} ' )
1015
1032
os .makedirs (temp_dir , exist_ok = True )
1016
1033
pull_changes = mp .get_pull_changes (server_info ["files" ])
@@ -1020,7 +1037,7 @@ def pull_project(self, directory, parallel=True):
1020
1037
fetch_files .append (f )
1021
1038
# extend fetch files download list with various version of diff files (if needed)
1022
1039
for f in pull_changes ["updated" ]:
1023
- if 'diffs' in f :
1040
+ if _pulling_file_with_diffs ( f ) :
1024
1041
for diff in f ['diffs' ]:
1025
1042
diff_file = copy .deepcopy (f )
1026
1043
for k , v in f ['history' ].items ():
@@ -1039,7 +1056,7 @@ def pull_project(self, directory, parallel=True):
1039
1056
with concurrent .futures .ThreadPoolExecutor () as executor :
1040
1057
futures_map = {}
1041
1058
for file in fetch_files :
1042
- diff_only = 'diffs' in file
1059
+ diff_only = _pulling_file_with_diffs ( f )
1043
1060
future = executor .submit (self ._download_file , project_path , file , temp_dir , parallel , diff_only )
1044
1061
futures_map [future ] = file
1045
1062
@@ -1052,13 +1069,13 @@ def pull_project(self, directory, parallel=True):
1052
1069
else :
1053
1070
for file in fetch_files :
1054
1071
# TODO check it does not fail, do some retry on ClientError
1055
- diff_only = 'diffs' in file
1072
+ diff_only = _pulling_file_with_diffs ( f )
1056
1073
self ._download_file (project_path , file , temp_dir , parallel , diff_only )
1057
1074
1058
1075
# make sure we can update geodiff reference files (aka. basefiles) with diffs or
1059
1076
# download their full versions so we have them up-to-date for applying changes
1060
1077
for file in pull_changes ['updated' ]:
1061
- if 'diffs' not in file :
1078
+ if not _pulling_file_with_diffs ( f ) :
1062
1079
continue
1063
1080
file ['version' ] = server_info ['version' ]
1064
1081
basefile = mp .fpath_meta (file ['path' ])
0 commit comments