From 6284b5734a20cb07aa1b46eaaf46a02eff97961a Mon Sep 17 00:00:00 2001 From: Christine Chambers Date: Fri, 14 Sep 2018 14:51:06 -0700 Subject: [PATCH 1/2] [refactor] Remove dependency to personal fork of supercluster from mapbox visualizations - Update dependency to reference the vanilla supercluster - Clean up backend api call for mapbox vizzes to ensure a boolean is sent to indicate whether the viz includes custom metric for clustering - Refactor of mapbox and its cluster overlay components to use vanilla supercluster and its recommeded way for handling clustering based on custom aggregations. - Allow reclustering within the initial bounds on render in mapbox visualizations (stay true to old behaviors). - Remove the median aggregation from available cluster label aggregators as there is no memory efficient way to implement this and it is unknown how often this feature is used - Updating doc to mention the backward incompatible change re median --- UPDATING.md | 12 ++-- superset/assets/package.json | 2 +- superset/assets/src/explore/controls.jsx | 1 - .../src/visualizations/MapBox/MapBox.jsx | 72 ++++++++++--------- .../MapBox/ScatterPlotGlowOverlay.jsx | 51 +++++++------ superset/assets/yarn.lock | 10 +-- superset/viz.py | 2 +- 7 files changed, 77 insertions(+), 73 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 4b182ae0c8774..eec22c23153f5 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -11,11 +11,13 @@ assists people when migrating to a new version. dashboards through an automated db migration script. We do recommend that you take a backup prior to this migration. +* Superset 0.28 deprecates the `median` cluster label aggregator for mapbox visualizations. This particular aggregation is not supported on mapbox visualizations going forward. + ## Superset 0.27.0 -* Superset 0.27 start to use nested layout for dashboard builder, which is not +* Superset 0.27 start to use nested layout for dashboard builder, which is not backward-compatible with earlier dashboard grid data. We provide migration script -to automatically convert dashboard grid to nested layout data. To be safe, please -take a database backup prior to this upgrade. It's the only way people could go +to automatically convert dashboard grid to nested layout data. To be safe, please +take a database backup prior to this upgrade. It's the only way people could go back to a previous state. @@ -38,7 +40,7 @@ The PRs bellow have more information around the breaking changes: * [4565](https://github.com/apache/incubator-superset/pull/4565) : we've changed the security model a bit where in the past you would have to define your authentication scheme by inheriting from Flask - App Builder's + App Builder's `from flask_appbuilder.security.sqla.manager import SecurityManager`, you now have to derive Superset's own derivative `superset.security.SupersetSecurityManager`. This @@ -46,7 +48,7 @@ The PRs bellow have more information around the breaking changes: permissions to another system as needed. For all implementation, you simply have to import and derive `SupersetSecurityManager` in place of the `SecurityManager` -* [4835](https://github.com/apache/incubator-superset/pull/4835) : +* [4835](https://github.com/apache/incubator-superset/pull/4835) : our `setup.py` now only pins versions where required, giving you more latitude in using versions of libraries as needed. We do now provide a `requirements.txt` with pinned versions if you want to run diff --git a/superset/assets/package.json b/superset/assets/package.json index d5571e08bb97d..f326ec6d6b08e 100644 --- a/superset/assets/package.json +++ b/superset/assets/package.json @@ -124,7 +124,7 @@ "shortid": "^2.2.6", "sprintf-js": "^1.1.1", "srcdoc-polyfill": "^1.0.0", - "supercluster": "https://github.com/georgeke/supercluster/tarball/ac3492737e7ce98e07af679623aad452373bbc40", + "supercluster": "^4.1.1", "underscore": "^1.8.3", "urijs": "^1.18.10", "viewport-mercator-project": "^5.0.0" diff --git a/superset/assets/src/explore/controls.jsx b/superset/assets/src/explore/controls.jsx index 9700454bd8118..f6f256279d558 100644 --- a/superset/assets/src/explore/controls.jsx +++ b/superset/assets/src/explore/controls.jsx @@ -1361,7 +1361,6 @@ export const controls = { 'mean', 'min', 'max', - 'median', 'stdev', 'var', ]), diff --git a/superset/assets/src/visualizations/MapBox/MapBox.jsx b/superset/assets/src/visualizations/MapBox/MapBox.jsx index 85cd1b1ad3ce6..65725dc98e4c1 100644 --- a/superset/assets/src/visualizations/MapBox/MapBox.jsx +++ b/superset/assets/src/visualizations/MapBox/MapBox.jsx @@ -48,10 +48,6 @@ class MapBox extends React.Component { height, }).fitBounds(bounds); const { latitude, longitude, zoom } = mercator; - // Compute the clusters based on the bounds. Again, this is only done once because - // we don't update the clusters as we pan/zoom in the current design. - const bbox = [bounds[0][0], bounds[0][1], bounds[1][0], bounds[1][1]]; - this.clusters = this.props.clusterer.getClusters(bbox, Math.round(zoom)); this.state = { viewport: { @@ -80,10 +76,19 @@ class MapBox extends React.Component { pointRadiusUnit, renderWhileDragging, rgb, + customMetric, + bounds, } = this.props; const { viewport } = this.state; const isDragging = viewport.isDragging === undefined ? false : viewport.isDragging; + + // Compute the clusters based on the original bounds and current zoom level. Note when zoom/pan + // to an area outside of the original bounds, no additional queries are made to the backend to + // retrieve additional data. + const bbox = [bounds[0][0], bounds[0][1], bounds[1][0], bounds[1][1]]; + const clusters = this.props.clusterer.getClusters(bbox, Math.round(viewport.zoom)); + return ( { const coordinates = location.get('geometry').get('coordinates'); return [coordinates.get(0), coordinates.get(1)]; @@ -119,30 +124,6 @@ class MapBox extends React.Component { MapBox.propTypes = propTypes; MapBox.defaultProps = defaultProps; -function createReducer(aggregatorName, customMetric) { - if (aggregatorName === 'sum' || !customMetric) { - return (a, b) => a + b; - } else if (aggregatorName === 'min') { - return Math.min; - } else if (aggregatorName === 'max') { - return Math.max; - } - return function (a, b) { - if (a instanceof Array) { - if (b instanceof Array) { - return a.concat(b); - } - a.push(b); - return a; - } - if (b instanceof Array) { - b.push(a); - return b; - } - return [a, b]; - }; -} - function mapbox(slice, payload, setControlValue) { const { formData, selector } = slice; const { @@ -170,18 +151,41 @@ function mapbox(slice, payload, setControlValue) { return; } - const clusterer = supercluster({ + const opts = { radius: clusteringRadius, maxZoom: DEFAULT_MAX_ZOOM, - metricKey: 'metric', - metricReducer: createReducer(aggregatorName, customMetric), - }); + }; + if (customMetric) { + opts.initial = () => ({ + sum: 0, + squaredSum: 0, + min: Infinity, + max: -Infinity, + }); + opts.map = prop => ({ + sum: prop.metric, + squaredSum: Math.pow(prop.metric, 2), + min: prop.metric, + max: prop.metric, + }); + opts.reduce = (accu, prop) => { + // Temporarily disable param-reassignment linting to work with supercluster's api + /* eslint-disable no-param-reassign */ + accu.sum += prop.sum; + accu.squaredSum += prop.squaredSum; + accu.min = Math.min(accu.min, prop.min); + accu.max = Math.max(accu.max, prop.max); + /* eslint-enable no-param-reassign */ + }; + } + const clusterer = supercluster(opts); clusterer.load(geoJSON.features); ReactDOM.render( { + const count = properties.get('point_count'); + if (!aggregation) { + return count; + } + if (aggregation === 'sum' || aggregation === 'min' || aggregation === 'max') { + return properties.get(aggregation); + } + const sum = properties.get('sum'); + const mean = sum / count; + if (aggregation === 'mean') { + return Math.round(100 * mean) / 100; + } + const squaredSum = properties.get('squaredSum'); + const variance = (squaredSum / count) - Math.pow(sum / count, 2); + if (aggregation === 'var') { + return Math.round(100 * variance) / 100; + } else if (aggregation === 'stdev') { + return Math.round(100 * Math.sqrt(variance)) / 100; + } + // fallback to point_count, this really shouldn't happen + return count; +}; + class ScatterPlotGlowOverlay extends React.Component { constructor(props) { super(props); @@ -90,34 +115,14 @@ class ScatterPlotGlowOverlay extends React.Component { const mercator = new ViewportMercator(props); const rgb = props.rgb; const clusterLabelMap = []; - let maxLabel = -1; props.locations.forEach(function (location, i) { if (location.get('properties').get('cluster')) { - let clusterLabel = location.get('properties').get('metric') - ? location.get('properties').get('metric') - : location.get('properties').get('point_count'); - - if (clusterLabel instanceof Immutable.List) { - clusterLabel = clusterLabel.toArray(); - if (props.aggregatorName === 'mean') { - clusterLabel = d3.mean(clusterLabel); - } else if (props.aggregatorName === 'median') { - clusterLabel = d3.median(clusterLabel); - } else if (props.aggregatorName === 'stdev') { - clusterLabel = d3.deviation(clusterLabel); - } else { - clusterLabel = d3.variance(clusterLabel); - } - } - - clusterLabel = isNumeric(clusterLabel) - ? d3.round(clusterLabel, 2) - : location.get('properties').get('point_count'); - maxLabel = Math.max(clusterLabel, maxLabel); - clusterLabelMap[i] = clusterLabel; + clusterLabelMap[i] = computeClusterLabel(location.get('properties'), + props.aggregation); } }, this); + const maxLabel = Math.max(...Object.values(clusterLabelMap)); ctx.save(); ctx.scale(pixelRatio, pixelRatio); diff --git a/superset/assets/yarn.lock b/superset/assets/yarn.lock index e0fc1ea6f194f..317de4f77e3ca 100644 --- a/superset/assets/yarn.lock +++ b/superset/assets/yarn.lock @@ -7114,7 +7114,7 @@ just-extend@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/just-extend/-/just-extend-3.0.0.tgz#cee004031eaabf6406da03a7b84e4fe9d78ef288" -kdbush@^1.0.0, kdbush@^1.0.1: +kdbush@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/kdbush/-/kdbush-1.0.1.tgz#3cbd03e9dead9c0f6f66ccdb96450e5cecc640e0" @@ -11732,18 +11732,12 @@ supercluster@^2.3.0: dependencies: kdbush "^1.0.1" -supercluster@^4.0.1: +supercluster@^4.0.1, supercluster@^4.1.1: version "4.1.1" resolved "https://registry.yarnpkg.com/supercluster/-/supercluster-4.1.1.tgz#cf13c3b28a3fb3db5290bfad7f524e244bd4ce78" dependencies: kdbush "^2.0.1" -"supercluster@https://github.com/georgeke/supercluster/tarball/ac3492737e7ce98e07af679623aad452373bbc40": - version "2.1.0" - resolved "https://github.com/georgeke/supercluster/tarball/ac3492737e7ce98e07af679623aad452373bbc40#3ac2d8efc0224fb6f4332a7192b619ded4b967a6" - dependencies: - kdbush "^1.0.0" - supports-color@3.1.2, supports-color@3.1.x: version "3.1.2" resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-3.1.2.tgz#72a262894d9d408b956ca05ff37b2ed8a6e2a2d5" diff --git a/superset/viz.py b/superset/viz.py index 77d8da551abba..0993bc49120d2 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -1960,7 +1960,7 @@ def get_data(self, df): return None fd = self.form_data label_col = fd.get('mapbox_label') - custom_metric = label_col and len(label_col) >= 1 + custom_metric = bool(label_col) and len(label_col) >= 1 metric_col = [None] * len(df.index) if custom_metric: if label_col[0] == fd.get('all_columns_x'): From 55a1955a14b54f14819a68b1eb76921b7b73e654 Mon Sep 17 00:00:00 2001 From: Christine Chambers Date: Mon, 17 Sep 2018 13:57:57 -0700 Subject: [PATCH 2/2] Perform the check for has_custom_metric through `not None` to produce a boolean and rename the field reflect it is a boolean. --- superset/assets/src/visualizations/MapBox/MapBox.jsx | 10 +++++----- superset/viz.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/superset/assets/src/visualizations/MapBox/MapBox.jsx b/superset/assets/src/visualizations/MapBox/MapBox.jsx index 65725dc98e4c1..f2381588e3656 100644 --- a/superset/assets/src/visualizations/MapBox/MapBox.jsx +++ b/superset/assets/src/visualizations/MapBox/MapBox.jsx @@ -76,7 +76,7 @@ class MapBox extends React.Component { pointRadiusUnit, renderWhileDragging, rgb, - customMetric, + hasCustomMetric, bounds, } = this.props; const { viewport } = this.state; @@ -110,7 +110,7 @@ class MapBox extends React.Component { globalOpacity={globalOpacity} compositeOperation={'screen'} renderWhileDragging={renderWhileDragging} - aggregation={customMetric ? aggregatorName : undefined} + aggregation={hasCustomMetric ? aggregatorName : null} lngLatAccessor={(location) => { const coordinates = location.get('geometry').get('coordinates'); return [coordinates.get(0), coordinates.get(1)]; @@ -127,7 +127,7 @@ MapBox.defaultProps = defaultProps; function mapbox(slice, payload, setControlValue) { const { formData, selector } = slice; const { - customMetric, + hasCustomMetric, geoJSON, bounds, mapboxApiKey, @@ -155,7 +155,7 @@ function mapbox(slice, payload, setControlValue) { radius: clusteringRadius, maxZoom: DEFAULT_MAX_ZOOM, }; - if (customMetric) { + if (hasCustomMetric) { opts.initial = () => ({ sum: 0, squaredSum: 0, @@ -185,7 +185,7 @@ function mapbox(slice, payload, setControlValue) { = 1 + has_custom_metric = label_col is not None and len(label_col) > 0 metric_col = [None] * len(df.index) - if custom_metric: + if has_custom_metric: if label_col[0] == fd.get('all_columns_x'): metric_col = df[fd.get('all_columns_x')] elif label_col[0] == fd.get('all_columns_y'): @@ -2003,7 +2003,7 @@ def get_data(self, df): return { 'geoJSON': geo_json, - 'customMetric': custom_metric, + 'hasCustomMetric': has_custom_metric, 'mapboxApiKey': config.get('MAPBOX_API_KEY'), 'mapStyle': fd.get('mapbox_style'), 'aggregatorName': fd.get('pandas_aggfunc'),