1
1
import csv
2
2
import json
3
3
import os
4
- import tempfile
5
4
from collections import defaultdict
6
5
from datetime import datetime
7
6
from itertools import groupby
8
7
9
- from mergin import ClientError
10
- from mergin .merginproject import MerginProject , pygeodiff
11
- from mergin .utils import int_version
8
+ from . import ClientError
9
+ from .merginproject import MerginProject , pygeodiff
10
+ from .utils import int_version
12
11
13
12
try :
14
- from qgis .core import QgsGeometry , QgsDistanceArea , QgsCoordinateReferenceSystem , QgsCoordinateTransformContext , QgsWkbTypes
13
+ from qgis .core import QgsGeometry , QgsDistanceArea , QgsCoordinateReferenceSystem , QgsCoordinateTransformContext , \
14
+ QgsWkbTypes
15
+
15
16
has_qgis = True
16
17
except ImportError :
17
18
has_qgis = False
@@ -58,12 +59,14 @@ def qgs_geom_from_wkb(geom):
58
59
59
60
class ChangesetReportEntry :
60
61
""" Derivative of geodiff ChangesetEntry suitable for further processing/reporting """
61
- def __init__ (self , changeset_entry , geom_idx , geom ):
62
+
63
+ def __init__ (self , changeset_entry , geom_idx , geom , qgs_distance_area = None ):
62
64
self .table = changeset_entry .table .name
63
65
self .geom_type = geom ["type" ]
64
66
self .crs = "EPSG:" + geom ["srs_id" ]
65
67
self .length = None
66
68
self .area = None
69
+ self .dim = 0
67
70
68
71
if changeset_entry .operation == changeset_entry .OP_DELETE :
69
72
self .operation = "delete"
@@ -75,14 +78,12 @@ def __init__(self, changeset_entry, geom_idx, geom):
75
78
self .operation = "unknown"
76
79
77
80
# only calculate geom properties when qgis api is available
78
- if not has_qgis :
81
+ if not qgs_distance_area :
79
82
return
80
83
81
- d = QgsDistanceArea ()
82
- d .setEllipsoid ('WGS84' )
83
84
crs = QgsCoordinateReferenceSystem ()
84
85
crs .createFromString (self .crs )
85
- d .setSourceCrs (crs , QgsCoordinateTransformContext ())
86
+ qgs_distance_area .setSourceCrs (crs , QgsCoordinateTransformContext ())
86
87
87
88
if hasattr (changeset_entry , "old_values" ):
88
89
old_wkb = changeset_entry .old_values [geom_idx ]
@@ -111,65 +112,94 @@ def __init__(self, changeset_entry, geom_idx, geom):
111
112
elif self .operation == "insert" :
112
113
qgs_geom = qgs_geom_from_wkb (new_wkb )
113
114
114
- dim = QgsWkbTypes .wkbDimensions (qgs_geom .wkbType ())
115
- if dim == 1 :
116
- self .length = d .measureLength (qgs_geom )
115
+ self . dim = QgsWkbTypes .wkbDimensions (qgs_geom .wkbType ())
116
+ if self . dim == 1 :
117
+ self .length = qgs_distance_area .measureLength (qgs_geom )
117
118
if updated_qgs_geom :
118
- self .length = d .measureLength (updated_qgs_geom ) - self .length
119
- elif dim == 2 :
120
- self .length = d .measurePerimeter (qgs_geom )
121
- self .area = d .measureArea (qgs_geom )
119
+ self .length = qgs_distance_area .measureLength (updated_qgs_geom ) - self .length
120
+ elif self . dim == 2 :
121
+ self .length = qgs_distance_area .measurePerimeter (qgs_geom )
122
+ self .area = qgs_distance_area .measureArea (qgs_geom )
122
123
if updated_qgs_geom :
123
- self .length = d .measurePerimeter (updated_qgs_geom ) - self .length
124
- self .area = d .measureArea (updated_qgs_geom ) - self .area
125
-
126
-
127
- class ChangesetReport :
128
- """ Report (aggregate) from geopackage changeset (diff file) """
129
- def __init__ (self , changeset_reader , schema ):
130
- self .cr = changeset_reader
131
- self .schema = schema
132
- self .entries = []
133
- # let's iterate through reader and populate entries
134
- for entry in self .cr :
135
- schema_table = next ((t for t in schema if t ["table" ] == entry .table .name ), None )
136
- # get geometry index in both gpkg schema and diffs values
137
- geom_idx = next ((index for (index , col ) in enumerate (schema_table ["columns" ]) if col ["type" ] == "geometry" ),
138
- None )
139
- if geom_idx is None :
140
- continue
124
+ self .length = qgs_distance_area .measurePerimeter (updated_qgs_geom ) - self .length
125
+ self .area = qgs_distance_area .measureArea (updated_qgs_geom ) - self .area
126
+
127
+
128
+ def changeset_report (changeset_reader , schema ):
129
+ """ Parse Geodiff changeset reader and create report from it.
130
+ Aggregate individual entries based on common table, operation and geom type.
131
+ If QGIS API is available, then lengths and areas of individual entries are summed.
132
+
133
+ :Example:
134
+
135
+ >>> geodiff.schema("sqlite", "", "/tmp/base.gpkg", "/tmp/base-schema.json")
136
+ >>> with open("/tmp/base-schema.json", 'r') as sf:
137
+ schema = json.load(sf).get("geodiff_schema")
138
+ >>> cr = geodiff.read_changeset("/tmp/base.gpkg-diff")
139
+ >>> changeset_report(cr, schema)
140
+ [{"table": "Lines", "operation": "insert", "length": 1.234, "area": 0.0, "count": 3}]
141
141
142
- geom_col = schema_table ["columns" ][geom_idx ]["geometry" ]
143
- report_entry = ChangesetReportEntry (entry , geom_idx , geom_col )
144
- self .entries .append (report_entry )
145
-
146
- def report (self ):
147
- """ Aggregate entries by table and operation type and calculate quantity """
148
- records = []
149
- tables = defaultdict (list )
150
-
151
- for obj in self .entries :
152
- tables [obj .table ].append (obj )
153
-
154
- for table , entries in tables .items ():
155
- items = groupby (entries , lambda i : (i .operation , i .geom_type ))
156
- for k , v in items :
157
- values = list (v )
158
- area = sum ([entry .area for entry in values if entry .area ]) if has_qgis else None
159
- length = sum ([entry .length for entry in values if entry .length ]) if has_qgis else None
160
- records .append ({
161
- "table" : table ,
162
- "operation" : k [0 ],
163
- "length" : length ,
164
- "area" : area ,
165
- "count" : len (values )
166
- })
167
- return records
142
+ Args:
143
+ changeset_reader (pygeodiff.ChangesetReader): changeset reader from geodiff changeset (diff file)
144
+ schema: geopackage schema with list of tables (from full .gpkg file)
145
+ Returns:
146
+ list of dict of aggregated records
147
+ """
148
+ entries = []
149
+ records = []
150
+
151
+ if has_qgis :
152
+ distance_area = QgsDistanceArea ()
153
+ distance_area .setEllipsoid ('WGS84' )
154
+ else :
155
+ distance_area = None
156
+ # let's iterate through reader and populate entries
157
+ for entry in changeset_reader :
158
+ schema_table = next ((t for t in schema if t ["table" ] == entry .table .name ), None )
159
+ # get geometry index in both gpkg schema and diffs values
160
+ geom_idx = next ((index for (index , col ) in enumerate (schema_table ["columns" ]) if col ["type" ] == "geometry" ),
161
+ None )
162
+ if geom_idx is None :
163
+ continue
164
+
165
+ geom_col = schema_table ["columns" ][geom_idx ]["geometry" ]
166
+ report_entry = ChangesetReportEntry (entry , geom_idx , geom_col , distance_area )
167
+ entries .append (report_entry )
168
+
169
+ # create a map of entries grouped by tables within single .gpkg file
170
+ tables = defaultdict (list )
171
+ for obj in entries :
172
+ tables [obj .table ].append (obj )
173
+
174
+ # iterate through all tables and aggregate changes by operation type (e.g. insert) and geometry type (e.g point)
175
+ # for example 3 point features inserted in 'Points' table would be single row with count 3
176
+ for table , entries in tables .items ():
177
+ items = groupby (entries , lambda i : (i .operation , i .geom_type ))
178
+ for k , v in items :
179
+ values = list (v )
180
+ # sum lenghts and areas only if it makes sense (valid dimension)
181
+ area = sum ([entry .area for entry in values if entry .area ]) if values [0 ].dim == 2 else None
182
+ length = sum ([entry .length for entry in values if entry .length ]) if values [0 ].dim > 0 else None
183
+ records .append ({
184
+ "table" : table ,
185
+ "operation" : k [0 ],
186
+ "length" : length ,
187
+ "area" : area ,
188
+ "count" : len (values )
189
+ })
190
+ return records
168
191
169
192
170
193
def create_report (mc , directory , since , to , out_file ):
171
194
""" Creates report from geodiff changesets for a range of project versions in CSV format.
172
195
196
+ Report is created for all .gpkg files and all tables within where updates were done using Geodiff lib.
197
+ Changeset contains operation (insert/update/delete) and geometry properties like length/perimeter and area.
198
+ Each row is an aggregate of the features modified by the same operation and of the same geometry type and contains
199
+ these values: "file", "table", "author", "date", "time", "version", "operation", "length", "area", "count"
200
+
201
+ No filtering and grouping is done here, this is left for third-party office software to use pivot table functionality.
202
+
173
203
Args:
174
204
mc (MerginClient): MerginClient instance.
175
205
directory (str): local project directory (must already exist).
@@ -209,12 +239,12 @@ def create_report(mc, directory, since, to, out_file):
209
239
mc .download_file_diffs (directory , f ["path" ], history_keys )
210
240
211
241
# download full gpkg in "to" version to analyze its schema to determine which col is geometry
212
- full_gpkg = os . path . join ( mp .meta_dir , ".cache" , f ["path" ])
242
+ full_gpkg = mp .fpath_cache ( f ["path" ], version = to )
213
243
if not os .path .exists (full_gpkg ):
214
244
mc .download_file (directory , f ["path" ], full_gpkg , to )
215
245
216
246
# get gpkg schema
217
- schema_file = full_gpkg + '-schema' # geodiff writes schema into a file
247
+ schema_file = full_gpkg + '-schema.json ' # geodiff writes schema into a file
218
248
if not os .path .exists (schema_file ):
219
249
mp .geodiff .schema ("sqlite" , "" , full_gpkg , schema_file )
220
250
with open (schema_file , 'r' ) as sf :
@@ -226,16 +256,13 @@ def create_report(mc, directory, since, to, out_file):
226
256
if f ['history' ][version ]["change" ] == "updated" :
227
257
warnings .append (f"Missing diff: { f ['path' ]} was overwritten in { version } - broken diff history" )
228
258
else :
229
- warnings .append (f"Missin diff: { f ['path' ]} was { f ['history' ][version ]['change' ]} in { version } " )
259
+ warnings .append (f"Missing diff: { f ['path' ]} was { f ['history' ][version ]['change' ]} in { version } " )
230
260
continue
231
261
232
- v_diff_file = os .path .join (mp .meta_dir , '.cache' ,
233
- version + "-" + f ['history' ][version ]['diff' ]['path' ])
234
-
262
+ v_diff_file = mp .fpath_cache (f ['history' ][version ]['diff' ]['path' ], version = version )
235
263
version_data = versions_map [version ]
236
264
cr = mp .geodiff .read_changeset (v_diff_file )
237
- rep = ChangesetReport (cr , schema )
238
- report = rep .report ()
265
+ report = changeset_report (cr , schema )
239
266
# append version info to changeset info
240
267
dt = datetime .fromisoformat (version_data ["created" ].rstrip ("Z" ))
241
268
version_fields = {
0 commit comments