-
Notifications
You must be signed in to change notification settings - Fork 11
Open
Description
h3_pyspark.polyfill fails when a valid multipolygon geojson is provided
this is expected behavior when utilizing the h3 native library.
however, i thought it would be helpful if this library is able to accept multipolygons.
could I get permission to push a PR?
implementation in src/h3_pyspark/__init__.py
@F.udf(returnType=T.ArrayType(T.StringType()))
@handle_nulls
def polyfill(polygons, res, geo_json_conformant):
# NOTE: this behavior differs from default
# h3-pyspark expect `polygons` argument to be a valid GeoJSON string
polygons = json.loads(polygons)
type_ = polygons["type"].lower()
if type_ == "multipolygon":
output = []
for i in polygons["coordinates"]:
_polygon = {"type": "Polygon", "coordinates": i}
output.extend(list(h3.polyfill(_polygon, res, geo_json_conformant)))
return sanitize_types(output)
return sanitize_types(h3.polyfill(polygons, res, geo_json_conformant))
test in tests/test_core.py
multipolygon = '{"type": "MultiPolygon","coordinates": [[[[108.98309290409088,13.240363245242063],[108.98343622684479,13.240363245242063],[108.98343622684479,13.240634779729014],[108.98309290409088,13.240634779729014],[108.98309290409088,13.240363245242063]]],[[[108.98349523544312,13.240002939397714],[108.98389220237732,13.240002939397714],[108.98389220237732,13.240269252464502],[108.98349523544312,13.240269252464502],[108.98349523544312,13.240002939397714]]]]}'
def test_polyfill_multipolygon(self):
h3_test_args, h3_pyspark_test_args = get_test_args(h3.polyfill)
print(h3_pyspark_test_args)
integer = 12
data = {
"res": integer,
"geo_json_conformant": True,
"geojson": multipolygon,
}
df = spark.createDataFrame([data])
actual = df.withColumn("actual", h3_pyspark.polyfill(*h3_pyspark_test_args))
actual = actual.collect()[0]["actual"]
print(actual)
expected = []
for i in json.loads(multipolygon)["coordinates"]:
_polygon = {"type": "Polygon", "coordinates": i}
expected.extend(list(h3.polyfill(_polygon, integer, True)))
expected = sanitize_types(expected)
assert sort(actual) == sort(expected)
nullbutt
Metadata
Metadata
Assignees
Labels
No labels