Skip to content

Commit 2b33452

Browse files
varmar05wonder-sk
authored andcommitted
code refactor
- use cache folder in MerginProject - use function instead of class to create report - create qgsDistanceArea only once per report
1 parent 1ee9b92 commit 2b33452

File tree

3 files changed

+111
-74
lines changed

3 files changed

+111
-74
lines changed

mergin/client_pull.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -701,8 +701,6 @@ def download_diffs_async(mc, project_directory, file_path, versions):
701701
mp.log.info("--- downloading diffs aborted")
702702
raise
703703

704-
download_dir = os.path.join(mp.meta_dir, ".cache")
705-
os.makedirs(download_dir, exist_ok=True)
706704
fetch_files = []
707705

708706
for version in versions:
@@ -718,8 +716,8 @@ def download_diffs_async(mc, project_directory, file_path, versions):
718716
download_list = [] # list of all items to be downloaded
719717
total_size = 0
720718
for file in fetch_files:
721-
items = _download_items(file, download_dir, diff_only=True)
722-
dest_file_path = os.path.normpath(os.path.join(download_dir, file["version"] + "-" + file['diff']['path']))
719+
items = _download_items(file, mp.cache_dir, diff_only=True)
720+
dest_file_path = mp.fpath_cache(file['diff']['path'], version=file["version"])
723721
if os.path.exists(dest_file_path):
724722
continue
725723
files_to_merge.append(FileToMerge(dest_file_path, items))
@@ -729,7 +727,7 @@ def download_diffs_async(mc, project_directory, file_path, versions):
729727

730728
mp.log.info(f"will download {len(download_list)} chunks, total size {total_size}")
731729

732-
job = PullJob(project_path, None, total_size, None, files_to_merge, download_list, download_dir, mp,
730+
job = PullJob(project_path, None, total_size, None, files_to_merge, download_list, mp.cache_dir, mp,
733731
server_info, {}, mc.username())
734732

735733
# start download

mergin/merginproject.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ def __init__(self, directory):
5252
# location for files from unfinished pull
5353
self.unfinished_pull_dir = os.path.join(self.meta_dir, 'unfinished_pull')
5454

55+
self.cache_dir = os.path.join(self.meta_dir, '.cache')
56+
if not os.path.exists(self.cache_dir):
57+
os.mkdir(self.cache_dir)
58+
5559
self.setup_logging(directory)
5660

5761
# make sure we can load correct pygeodiff
@@ -119,6 +123,14 @@ def fpath_unfinished_pull(self, file):
119123
""" Helper function to get absolute path of file in unfinished_pull dir. """
120124
return self.fpath(file, self.unfinished_pull_dir)
121125

126+
def fpath_cache(self, file, version=None):
127+
""" Helper function to get absolute path of file in cache dir.
128+
It can be either in root cache directory (.mergin/.cache/) or in some version's subfolder
129+
"""
130+
if version:
131+
return self.fpath(file, os.path.join(self.cache_dir, version))
132+
return self.fpath(file, self.cache_dir)
133+
122134
@property
123135
def metadata(self):
124136
if not os.path.exists(self.fpath_meta('mergin.json')):

mergin/report.py

Lines changed: 96 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
import csv
22
import json
33
import os
4-
import tempfile
54
from collections import defaultdict
65
from datetime import datetime
76
from itertools import groupby
87

9-
from mergin import ClientError
10-
from mergin.merginproject import MerginProject, pygeodiff
11-
from mergin.utils import int_version
8+
from . import ClientError
9+
from .merginproject import MerginProject, pygeodiff
10+
from .utils import int_version
1211

1312
try:
14-
from qgis.core import QgsGeometry, QgsDistanceArea, QgsCoordinateReferenceSystem, QgsCoordinateTransformContext, QgsWkbTypes
13+
from qgis.core import QgsGeometry, QgsDistanceArea, QgsCoordinateReferenceSystem, QgsCoordinateTransformContext, \
14+
QgsWkbTypes
15+
1516
has_qgis = True
1617
except ImportError:
1718
has_qgis = False
@@ -58,12 +59,14 @@ def qgs_geom_from_wkb(geom):
5859

5960
class ChangesetReportEntry:
6061
""" Derivative of geodiff ChangesetEntry suitable for further processing/reporting """
61-
def __init__(self, changeset_entry, geom_idx, geom):
62+
63+
def __init__(self, changeset_entry, geom_idx, geom, qgs_distance_area=None):
6264
self.table = changeset_entry.table.name
6365
self.geom_type = geom["type"]
6466
self.crs = "EPSG:" + geom["srs_id"]
6567
self.length = None
6668
self.area = None
69+
self.dim = 0
6770

6871
if changeset_entry.operation == changeset_entry.OP_DELETE:
6972
self.operation = "delete"
@@ -75,14 +78,12 @@ def __init__(self, changeset_entry, geom_idx, geom):
7578
self.operation = "unknown"
7679

7780
# only calculate geom properties when qgis api is available
78-
if not has_qgis:
81+
if not qgs_distance_area:
7982
return
8083

81-
d = QgsDistanceArea()
82-
d.setEllipsoid('WGS84')
8384
crs = QgsCoordinateReferenceSystem()
8485
crs.createFromString(self.crs)
85-
d.setSourceCrs(crs, QgsCoordinateTransformContext())
86+
qgs_distance_area.setSourceCrs(crs, QgsCoordinateTransformContext())
8687

8788
if hasattr(changeset_entry, "old_values"):
8889
old_wkb = changeset_entry.old_values[geom_idx]
@@ -111,65 +112,94 @@ def __init__(self, changeset_entry, geom_idx, geom):
111112
elif self.operation == "insert":
112113
qgs_geom = qgs_geom_from_wkb(new_wkb)
113114

114-
dim = QgsWkbTypes.wkbDimensions(qgs_geom.wkbType())
115-
if dim == 1:
116-
self.length = d.measureLength(qgs_geom)
115+
self.dim = QgsWkbTypes.wkbDimensions(qgs_geom.wkbType())
116+
if self.dim == 1:
117+
self.length = qgs_distance_area.measureLength(qgs_geom)
117118
if updated_qgs_geom:
118-
self.length = d.measureLength(updated_qgs_geom) - self.length
119-
elif dim == 2:
120-
self.length = d.measurePerimeter(qgs_geom)
121-
self.area = d.measureArea(qgs_geom)
119+
self.length = qgs_distance_area.measureLength(updated_qgs_geom) - self.length
120+
elif self.dim == 2:
121+
self.length = qgs_distance_area.measurePerimeter(qgs_geom)
122+
self.area = qgs_distance_area.measureArea(qgs_geom)
122123
if updated_qgs_geom:
123-
self.length = d.measurePerimeter(updated_qgs_geom) - self.length
124-
self.area = d.measureArea(updated_qgs_geom) - self.area
125-
126-
127-
class ChangesetReport:
128-
""" Report (aggregate) from geopackage changeset (diff file) """
129-
def __init__(self, changeset_reader, schema):
130-
self.cr = changeset_reader
131-
self.schema = schema
132-
self.entries = []
133-
# let's iterate through reader and populate entries
134-
for entry in self.cr:
135-
schema_table = next((t for t in schema if t["table"] == entry.table.name), None)
136-
# get geometry index in both gpkg schema and diffs values
137-
geom_idx = next((index for (index, col) in enumerate(schema_table["columns"]) if col["type"] == "geometry"),
138-
None)
139-
if geom_idx is None:
140-
continue
124+
self.length = qgs_distance_area.measurePerimeter(updated_qgs_geom) - self.length
125+
self.area = qgs_distance_area.measureArea(updated_qgs_geom) - self.area
126+
127+
128+
def changeset_report(changeset_reader, schema):
129+
""" Parse Geodiff changeset reader and create report from it.
130+
Aggregate individual entries based on common table, operation and geom type.
131+
If QGIS API is available, then lengths and areas of individual entries are summed.
132+
133+
:Example:
134+
135+
>>> geodiff.schema("sqlite", "", "/tmp/base.gpkg", "/tmp/base-schema.json")
136+
>>> with open("/tmp/base-schema.json", 'r') as sf:
137+
schema = json.load(sf).get("geodiff_schema")
138+
>>> cr = geodiff.read_changeset("/tmp/base.gpkg-diff")
139+
>>> changeset_report(cr, schema)
140+
[{"table": "Lines", "operation": "insert", "length": 1.234, "area": 0.0, "count": 3}]
141141
142-
geom_col = schema_table["columns"][geom_idx]["geometry"]
143-
report_entry = ChangesetReportEntry(entry, geom_idx, geom_col)
144-
self.entries.append(report_entry)
145-
146-
def report(self):
147-
""" Aggregate entries by table and operation type and calculate quantity """
148-
records = []
149-
tables = defaultdict(list)
150-
151-
for obj in self.entries:
152-
tables[obj.table].append(obj)
153-
154-
for table, entries in tables.items():
155-
items = groupby(entries, lambda i: (i.operation, i.geom_type))
156-
for k, v in items:
157-
values = list(v)
158-
area = sum([entry.area for entry in values if entry.area]) if has_qgis else None
159-
length = sum([entry.length for entry in values if entry.length]) if has_qgis else None
160-
records.append({
161-
"table": table,
162-
"operation": k[0],
163-
"length": length,
164-
"area": area,
165-
"count": len(values)
166-
})
167-
return records
142+
Args:
143+
changeset_reader (pygeodiff.ChangesetReader): changeset reader from geodiff changeset (diff file)
144+
schema: geopackage schema with list of tables (from full .gpkg file)
145+
Returns:
146+
list of dict of aggregated records
147+
"""
148+
entries = []
149+
records = []
150+
151+
if has_qgis:
152+
distance_area = QgsDistanceArea()
153+
distance_area.setEllipsoid('WGS84')
154+
else:
155+
distance_area = None
156+
# let's iterate through reader and populate entries
157+
for entry in changeset_reader:
158+
schema_table = next((t for t in schema if t["table"] == entry.table.name), None)
159+
# get geometry index in both gpkg schema and diffs values
160+
geom_idx = next((index for (index, col) in enumerate(schema_table["columns"]) if col["type"] == "geometry"),
161+
None)
162+
if geom_idx is None:
163+
continue
164+
165+
geom_col = schema_table["columns"][geom_idx]["geometry"]
166+
report_entry = ChangesetReportEntry(entry, geom_idx, geom_col, distance_area)
167+
entries.append(report_entry)
168+
169+
# create a map of entries grouped by tables within single .gpkg file
170+
tables = defaultdict(list)
171+
for obj in entries:
172+
tables[obj.table].append(obj)
173+
174+
# iterate through all tables and aggregate changes by operation type (e.g. insert) and geometry type (e.g point)
175+
# for example 3 point features inserted in 'Points' table would be single row with count 3
176+
for table, entries in tables.items():
177+
items = groupby(entries, lambda i: (i.operation, i.geom_type))
178+
for k, v in items:
179+
values = list(v)
180+
# sum lenghts and areas only if it makes sense (valid dimension)
181+
area = sum([entry.area for entry in values if entry.area]) if values[0].dim == 2 else None
182+
length = sum([entry.length for entry in values if entry.length]) if values[0].dim > 0 else None
183+
records.append({
184+
"table": table,
185+
"operation": k[0],
186+
"length": length,
187+
"area": area,
188+
"count": len(values)
189+
})
190+
return records
168191

169192

170193
def create_report(mc, directory, since, to, out_file):
171194
""" Creates report from geodiff changesets for a range of project versions in CSV format.
172195
196+
Report is created for all .gpkg files and all tables within where updates were done using Geodiff lib.
197+
Changeset contains operation (insert/update/delete) and geometry properties like length/perimeter and area.
198+
Each row is an aggregate of the features modified by the same operation and of the same geometry type and contains
199+
these values: "file", "table", "author", "date", "time", "version", "operation", "length", "area", "count"
200+
201+
No filtering and grouping is done here, this is left for third-party office software to use pivot table functionality.
202+
173203
Args:
174204
mc (MerginClient): MerginClient instance.
175205
directory (str): local project directory (must already exist).
@@ -209,12 +239,12 @@ def create_report(mc, directory, since, to, out_file):
209239
mc.download_file_diffs(directory, f["path"], history_keys)
210240

211241
# download full gpkg in "to" version to analyze its schema to determine which col is geometry
212-
full_gpkg = os.path.join(mp.meta_dir, ".cache", f["path"])
242+
full_gpkg = mp.fpath_cache(f["path"], version=to)
213243
if not os.path.exists(full_gpkg):
214244
mc.download_file(directory, f["path"], full_gpkg, to)
215245

216246
# get gpkg schema
217-
schema_file = full_gpkg + '-schema' # geodiff writes schema into a file
247+
schema_file = full_gpkg + '-schema.json' # geodiff writes schema into a file
218248
if not os.path.exists(schema_file):
219249
mp.geodiff.schema("sqlite", "", full_gpkg, schema_file)
220250
with open(schema_file, 'r') as sf:
@@ -226,16 +256,13 @@ def create_report(mc, directory, since, to, out_file):
226256
if f['history'][version]["change"] == "updated":
227257
warnings.append(f"Missing diff: {f['path']} was overwritten in {version} - broken diff history")
228258
else:
229-
warnings.append(f"Missin diff: {f['path']} was {f['history'][version]['change']} in {version}")
259+
warnings.append(f"Missing diff: {f['path']} was {f['history'][version]['change']} in {version}")
230260
continue
231261

232-
v_diff_file = os.path.join(mp.meta_dir, '.cache',
233-
version + "-" + f['history'][version]['diff']['path'])
234-
262+
v_diff_file = mp.fpath_cache(f['history'][version]['diff']['path'], version=version)
235263
version_data = versions_map[version]
236264
cr = mp.geodiff.read_changeset(v_diff_file)
237-
rep = ChangesetReport(cr, schema)
238-
report = rep.report()
265+
report = changeset_report(cr, schema)
239266
# append version info to changeset info
240267
dt = datetime.fromisoformat(version_data["created"].rstrip("Z"))
241268
version_fields = {

0 commit comments

Comments
 (0)