-
Notifications
You must be signed in to change notification settings - Fork 120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding a function to pick important features by grid #2001
Merged
Merged
Changes from 16 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
19f7184
Adding a function to pick important features by grid
iandees ba251f3
Undo import optimization
iandees 99a5944
Missed a newline
iandees 88a232f
Add a tiny bit of comment about the bucketing
iandees 17b4184
Rename integration test for collision
iandees d6d3980
Sort buckets and take the top instead of bucketing to a single feature
iandees 9635901
Pass through non-point and features that don't match items_matching
iandees 49a13cd
Add some unit tests
iandees 2a05f60
Add a couple more unit tests
iandees f86f5be
Trying integration test
iandees 58a831b
Support multi-sort and reversing
iandees 44a3d80
Make integration test work
iandees 514a549
Merge branch 'master' into dees/keep_n_features_gridded
iandees a4291c7
Fix trailing whitespace
iandees 23786bf
Resolve pre-commit yaml check
iandees d8b7dbe
Add support for grid_height/grid_width as separate values
iandees ab0d108
Add a note about 256 vs 512 sizing
iandees File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -395,6 +395,292 @@ def test_short_name(self): | |
self.assertEquals('foo', props['name:short']) | ||
|
||
|
||
class KeepNGriddedTest(unittest.TestCase): | ||
longMessage=True | ||
|
||
def test_not_points(self): | ||
from tilequeue.process import Context | ||
import shapely.geometry | ||
|
||
test_shape_1 = shapely.geometry.Polygon( | ||
[(1, 1), (2, 2), (1, 2), (1, 1)]) | ||
test_shape_2 = shapely.geometry.Polygon( | ||
[(10, 10), (20, 20), (10, 20), (10, 10)]) | ||
features = [ | ||
(test_shape_1, {"foo": "bar"}, "test_shape_1"), | ||
(test_shape_2, {"foo": "bar"}, "test_shape_2"), | ||
] | ||
feature_layer = dict( | ||
features=features, | ||
layer_datum=dict(name='test_layer'), | ||
) | ||
feature_layers = [feature_layer] | ||
bounds = (0, 0, 100, 100) | ||
ctx = Context( | ||
feature_layers=feature_layers, | ||
nominal_zoom=0, | ||
unpadded_bounds=bounds, | ||
params=dict( | ||
source_layer="test_layer", | ||
items_matching=dict(foo="bar"), | ||
max_items=1, | ||
grid_width=2, | ||
sorting_keys=[ | ||
{"sort_key": "foo"}, | ||
], | ||
), | ||
resources=None, | ||
log=None, | ||
) | ||
from vectordatasource.transform import keep_n_features_gridded | ||
layer = keep_n_features_gridded(ctx) | ||
output_features = layer['features'] | ||
self.assertEquals(features, output_features, "Non-point features should pass through without modification") | ||
|
||
def test_points_keep_1(self): | ||
from tilequeue.process import Context | ||
import shapely.geometry | ||
|
||
test_shape_1 = shapely.geometry.Point((1.1, 1.0)) | ||
test_shape_2 = shapely.geometry.Point((1.1, 1.0)) | ||
features = [ | ||
(test_shape_1, {"foo": "bar"}, "test_shape_1"), | ||
(test_shape_2, {"foo": "bar"}, "test_shape_2"), | ||
] | ||
feature_layer = dict( | ||
features=features, | ||
layer_datum=dict(name='test_layer'), | ||
) | ||
feature_layers = [feature_layer] | ||
bounds = (0, 0, 100, 100) | ||
ctx = Context( | ||
feature_layers=feature_layers, | ||
nominal_zoom=0, | ||
unpadded_bounds=bounds, | ||
params=dict( | ||
source_layer="test_layer", | ||
items_matching=dict(foo="bar"), | ||
max_items=1, | ||
grid_width=2, | ||
sorting_keys=[ | ||
{"sort_key": "foo"}, | ||
], | ||
), | ||
resources=None, | ||
log=None, | ||
) | ||
from vectordatasource.transform import keep_n_features_gridded | ||
layer = keep_n_features_gridded(ctx) | ||
output_features = layer['features'] | ||
self.assertEqual(1, len(output_features), "Should consolidate to a single point in the bucket") | ||
self.assertEqual("test_shape_1", output_features[0][2], "All values equal, should pick first one") | ||
|
||
def test_points_keep_1_multisort_second(self): | ||
from tilequeue.process import Context | ||
import shapely.geometry | ||
|
||
test_shape_1 = shapely.geometry.Point((1.1, 1.0)) | ||
test_shape_2 = shapely.geometry.Point((1.1, 1.0)) | ||
features = [ | ||
(test_shape_2, {"foo": "bar", "min_zoom": 12.0, "population": 20000}, "test_shape_2"), | ||
(test_shape_1, {"foo": "bar", "min_zoom": 12.0, "population": 10000}, "test_shape_1"), | ||
] | ||
feature_layer = dict( | ||
features=features, | ||
layer_datum=dict(name='test_layer'), | ||
) | ||
feature_layers = [feature_layer] | ||
bounds = (0, 0, 100, 100) | ||
ctx = Context( | ||
feature_layers=feature_layers, | ||
nominal_zoom=0, | ||
unpadded_bounds=bounds, | ||
params=dict( | ||
source_layer="test_layer", | ||
items_matching=dict(foo="bar"), | ||
max_items=1, | ||
grid_width=2, | ||
sorting_keys=[ | ||
{"sort_key": "min_zoom"}, | ||
{"sort_key": "population", "reverse": True}, | ||
], | ||
), | ||
resources=None, | ||
log=None, | ||
) | ||
from vectordatasource.transform import keep_n_features_gridded | ||
layer = keep_n_features_gridded(ctx) | ||
output_features = layer['features'] | ||
self.assertEqual(1, len(output_features), "Should consolidate to a single point in the bucket") | ||
self.assertEqual("test_shape_2", output_features[0][2], "Should pick the shape with higher population") | ||
|
||
def test_points_keep_1_multisort_minzoom(self): | ||
from tilequeue.process import Context | ||
import shapely.geometry | ||
|
||
test_shape_1 = shapely.geometry.Point((1.1, 1.0)) | ||
test_shape_2 = shapely.geometry.Point((1.1, 1.0)) | ||
features = [ | ||
(test_shape_2, {"foo": "bar", "min_zoom": 12.0, "population": 20000}, "test_shape_2"), | ||
(test_shape_1, {"foo": "bar", "min_zoom": 10.0, "population": 10000}, "test_shape_1"), | ||
] | ||
feature_layer = dict( | ||
features=features, | ||
layer_datum=dict(name='test_layer'), | ||
) | ||
feature_layers = [feature_layer] | ||
bounds = (0, 0, 100, 100) | ||
ctx = Context( | ||
feature_layers=feature_layers, | ||
nominal_zoom=0, | ||
unpadded_bounds=bounds, | ||
params=dict( | ||
source_layer="test_layer", | ||
items_matching=dict(foo="bar"), | ||
max_items=1, | ||
grid_width=2, | ||
sorting_keys=[ | ||
{"sort_key": "min_zoom"}, | ||
{"sort_key": "population", "reverse": True}, | ||
], | ||
), | ||
resources=None, | ||
log=None, | ||
) | ||
from vectordatasource.transform import keep_n_features_gridded | ||
layer = keep_n_features_gridded(ctx) | ||
output_features = layer['features'] | ||
self.assertEqual(1, len(output_features), "Should consolidate to a single point in the bucket") | ||
self.assertEqual("test_shape_1", output_features[0][2], "Should pick the shape with lower min_zoom") | ||
|
||
def test_points_keep_1_different_buckets(self): | ||
from tilequeue.process import Context | ||
import shapely.geometry | ||
|
||
test_shape_1 = shapely.geometry.Point((1.0, 1.0)) | ||
test_shape_2 = shapely.geometry.Point((1.0, 1.0)) | ||
test_shape_3 = shapely.geometry.Point((75.0, 75.0)) | ||
test_shape_4 = shapely.geometry.Point((25.0, 75.0)) | ||
features = [ | ||
(test_shape_1, {"foo": "bar", "population": 1000}, "test_shape_1"), | ||
(test_shape_2, {"foo": "bar", "population": 2000}, "test_shape_2"), | ||
(test_shape_3, {"foo": "bar", "population": 3000}, "test_shape_3"), | ||
(test_shape_4, {"foo": "bar", "population": 4000}, "test_shape_4"), | ||
] | ||
feature_layer = dict( | ||
features=features, | ||
layer_datum=dict(name='test_layer'), | ||
) | ||
feature_layers = [feature_layer] | ||
bounds = (0, 0, 100, 100) | ||
ctx = Context( | ||
feature_layers=feature_layers, | ||
nominal_zoom=0, | ||
unpadded_bounds=bounds, | ||
params=dict( | ||
source_layer="test_layer", | ||
items_matching=dict(foo="bar"), | ||
max_items=1, | ||
grid_width=2, | ||
sorting_keys=[ | ||
{"sort_key": "population", "reverse": True}, | ||
], | ||
), | ||
resources=None, | ||
log=None, | ||
) | ||
from vectordatasource.transform import keep_n_features_gridded | ||
layer = keep_n_features_gridded(ctx) | ||
output_features = layer['features'] | ||
self.assertEqual(3, len(output_features), "Should consolidate to 3 points") | ||
self.assertEqual("test_shape_4", output_features[0][2]) | ||
self.assertEqual("test_shape_2", output_features[1][2]) | ||
self.assertEqual("test_shape_3", output_features[2][2]) | ||
|
||
def test_points_keep_more_than_in_one_bucket(self): | ||
from tilequeue.process import Context | ||
import shapely.geometry | ||
|
||
test_shape_1 = shapely.geometry.Point((1.0, 1.0)) | ||
test_shape_2 = shapely.geometry.Point((1.0, 1.0)) | ||
test_shape_3 = shapely.geometry.Point((75.0, 75.0)) | ||
test_shape_4 = shapely.geometry.Point((25.0, 75.0)) | ||
features = [ | ||
(test_shape_1, {"foo": "bar", "population": 1000}, "test_shape_1"), | ||
(test_shape_2, {"foo": "bar", "population": 2000}, "test_shape_2"), | ||
(test_shape_3, {"foo": "bar", "population": 3000}, "test_shape_3"), | ||
(test_shape_4, {"foo": "bar", "population": 4000}, "test_shape_4"), | ||
] | ||
feature_layer = dict( | ||
features=features, | ||
layer_datum=dict(name='test_layer'), | ||
) | ||
feature_layers = [feature_layer] | ||
bounds = (0, 0, 100, 100) | ||
ctx = Context( | ||
feature_layers=feature_layers, | ||
nominal_zoom=0, | ||
unpadded_bounds=bounds, | ||
params=dict( | ||
source_layer="test_layer", | ||
items_matching=dict(foo="bar"), | ||
max_items=5, | ||
grid_width=2, | ||
sorting_keys=[ | ||
{"sort_key": "min_zoom", "reverse": True}, | ||
{"sort_key": "population"}, | ||
], | ||
), | ||
resources=None, | ||
log=None, | ||
) | ||
from vectordatasource.transform import keep_n_features_gridded | ||
layer = keep_n_features_gridded(ctx) | ||
output_features = layer['features'] | ||
self.assertEqual(4, len(output_features), "Should not consolidate because we're keeping top 5") | ||
self.assertEqual("test_shape_4", output_features[0][2]) | ||
self.assertEqual("test_shape_1", output_features[1][2]) | ||
self.assertEqual("test_shape_2", output_features[2][2]) | ||
self.assertEqual("test_shape_3", output_features[3][2]) | ||
|
||
def test_fail_on_non_integer_reverse_sort_key(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. great tests. Really illustrate what this does |
||
from tilequeue.process import Context | ||
import shapely.geometry | ||
|
||
test_shape_1 = shapely.geometry.Point((1.0, 1.0)) | ||
test_shape_2 = shapely.geometry.Point((1.0, 1.0)) | ||
features = [ | ||
(test_shape_1, {"foo": "bar", "population": 1000}, "test_shape_1"), | ||
(test_shape_2, {"foo": "bar", "population": 'error'}, "test_shape_2"), | ||
] | ||
feature_layer = dict( | ||
features=features, | ||
layer_datum=dict(name='test_layer'), | ||
) | ||
feature_layers = [feature_layer] | ||
bounds = (0, 0, 100, 100) | ||
ctx = Context( | ||
feature_layers=feature_layers, | ||
nominal_zoom=0, | ||
unpadded_bounds=bounds, | ||
params=dict( | ||
source_layer="test_layer", | ||
items_matching=dict(foo="bar"), | ||
max_items=5, | ||
grid_width=2, | ||
sorting_keys=[ | ||
{"sort_key": "population", "reverse": True}, | ||
], | ||
), | ||
resources=None, | ||
log=None, | ||
) | ||
from vectordatasource.transform import keep_n_features_gridded | ||
with self.assertRaises(ValueError): | ||
keep_n_features_gridded(ctx) | ||
self.fail("Should raise an exception when reverse-sorting a non-numeric property") | ||
|
||
|
||
class TagsPriorityI18nTest(unittest.TestCase): | ||
|
||
def _call_fut(self, source, kvs): | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I take it all these values for grid width and start/end zoom were arrived at experimentally? Did you try anything like reducing the width but increasing max_items to get a more organic look?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, I did a bit of experimentation based on the suggestions Nathaniel made above and these values seemed to look best. At least in the really dense area around Tokyo.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't try more than one item per bucket. That and wider-than-taller buckets (since labels are wider than taller) are two things we could try in the future I think.