Skip to content

Commit d24996b

Browse files
kkk25641463b41sh
andauthored
feat(query): ST_TRANSFORM (#15992)
* ST_TRANSFORM Signed-off-by: Fan Yang <yangfanlinux@gmail.com> * ST_TRANSFORM Signed-off-by: Fan Yang <yangfanlinux@gmail.com> * tmp test * try fix * try fix * use proj4rs * fix --------- Signed-off-by: Fan Yang <yangfanlinux@gmail.com> Co-authored-by: b41sh <baishen2009@gmail.com>
1 parent 5240970 commit d24996b

File tree

8 files changed

+466
-201
lines changed

8 files changed

+466
-201
lines changed

Cargo.lock

Lines changed: 119 additions & 61 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ parquet = { version = "52", features = ["async"] }
290290
paste = "1.0.15"
291291
# TODO: let's use native tls instead.
292292
poem = { version = "3.0", features = ["openssl-tls", "multipart", "compression"] }
293+
proj4rs = { version = "0.1.3", features = ["geo-types", "crs-definitions"] }
293294
prometheus-client = "0.22"
294295
prost = { version = "0.12.1" }
295296
prost-build = { version = "0.12.1" }

src/query/functions/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ ordered-float = { workspace = true, features = [
5151
"serde",
5252
"rand",
5353
] }
54+
proj4rs = { workspace = true }
5455
rand = { workspace = true }
5556
regex = { workspace = true }
5657
roaring = "0.10.1"

src/query/functions/src/scalars/geometry.rs

Lines changed: 228 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use databend_common_expression::types::VariantType;
2525
use databend_common_expression::types::F64;
2626
use databend_common_expression::vectorize_with_builder_1_arg;
2727
use databend_common_expression::vectorize_with_builder_2_arg;
28+
use databend_common_expression::vectorize_with_builder_3_arg;
2829
use databend_common_expression::vectorize_with_builder_4_arg;
2930
use databend_common_expression::FunctionDomain;
3031
use databend_common_expression::FunctionRegistry;
@@ -42,6 +43,9 @@ use geo::EuclideanLength;
4243
use geo::HasDimensions;
4344
use geo::HaversineDistance;
4445
use geo::Point;
46+
use geo::ToDegrees;
47+
use geo::ToRadians;
48+
use geo_types::coord;
4549
use geo_types::Polygon;
4650
use geohash::decode_bbox;
4751
use geohash::encode;
@@ -62,6 +66,8 @@ use geozero::ToWkt;
6266
use jsonb::parse_value;
6367
use jsonb::to_string;
6468
use num_traits::AsPrimitive;
69+
use proj4rs::transform::transform;
70+
use proj4rs::Proj;
6571

6672
pub fn register(registry: &mut FunctionRegistry) {
6773
// aliases
@@ -1502,109 +1508,112 @@ pub fn register(registry: &mut FunctionRegistry) {
15021508
),
15031509
);
15041510

1505-
// registry.register_passthrough_nullable_2_arg::<GeometryType, Int32Type, GeometryType, _, _>(
1506-
// "st_transform",
1507-
// |_, _, _| FunctionDomain::MayThrow,
1508-
// vectorize_with_builder_2_arg::<GeometryType, Int32Type, GeometryType>(
1509-
// |original, srid, builder, ctx| {
1510-
// if let Some(validity) = &ctx.validity {
1511-
// if !validity.get_bit(builder.len()) {
1512-
// builder.commit_row();
1513-
// return;
1514-
// }
1515-
// }
1516-
//
1517-
// #[allow(unused_assignments)]
1518-
// let mut from_srid = 0;
1519-
//
1520-
// // All representations of the geo types supported by crates under the GeoRust organization, have not implemented srid().
1521-
// // Currently, the srid() of all types returns the default value `None`, so we need to parse it manually here.
1522-
// match read_ewkb_srid(&mut std::io::Cursor::new(original)) {
1523-
// Ok(srid) if srid.is_some() => from_srid = srid.unwrap(),
1524-
// _ => {
1525-
// ctx.set_error(
1526-
// builder.len(),
1527-
// ErrorCode::GeometryError(" input geometry must has the correct SRID")
1528-
// .to_string(),
1529-
// );
1530-
// builder.commit_row();
1531-
// return;
1532-
// }
1533-
// }
1534-
//
1535-
// let result = {
1536-
// Ewkb(original).to_geo().map_err(ErrorCode::from).and_then(
1537-
// |mut geom: Geometry| {
1538-
// Proj::new_known_crs(&make_crs(from_srid), &make_crs(srid), None)
1539-
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
1540-
// .and_then(|proj| {
1541-
// geom.transform(&proj)
1542-
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
1543-
// .and_then(|_| {
1544-
// geom.to_ewkb(geom.dims(), Some(srid))
1545-
// .map_err(ErrorCode::from)
1546-
// })
1547-
// })
1548-
// },
1549-
// )
1550-
// };
1551-
//
1552-
// match result {
1553-
// Ok(data) => {
1554-
// builder.put_slice(data.as_slice());
1555-
// }
1556-
// Err(e) => {
1557-
// ctx.set_error(builder.len(), e.to_string());
1558-
// }
1559-
// }
1560-
//
1561-
// builder.commit_row();
1562-
// },
1563-
// ),
1564-
// );
1565-
//
1566-
// registry.register_passthrough_nullable_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType, _, _>(
1567-
// "st_transform",
1568-
// |_, _, _,_| FunctionDomain::MayThrow,
1569-
// vectorize_with_builder_3_arg::<GeometryType, Int32Type,Int32Type, GeometryType>(
1570-
// |original, from_srid, to_srid, builder, ctx| {
1571-
// if let Some(validity) = &ctx.validity {
1572-
// if !validity.get_bit(builder.len()) {
1573-
// builder.commit_row();
1574-
// return;
1575-
// }
1576-
// }
1577-
//
1578-
// let result = {
1579-
// Proj::new_known_crs(&make_crs(from_srid), &make_crs(to_srid), None)
1580-
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
1581-
// .and_then(|proj| {
1582-
// let old = Ewkb(original.to_vec());
1583-
// Ewkb(old.to_ewkb(old.dims(), Some(from_srid)).unwrap()).to_geo().map_err(ErrorCode::from).and_then(|mut geom| {
1584-
// geom.transform(&proj).map_err(|e|ErrorCode::GeometryError(e.to_string())).and_then(|_| {
1585-
// geom.to_ewkb(old.dims(), Some(to_srid)).map_err(ErrorCode::from)
1586-
// })
1587-
// })
1588-
// })
1589-
// };
1590-
// match result {
1591-
// Ok(data) => {
1592-
// builder.put_slice(data.as_slice());
1593-
// }
1594-
// Err(e) => {
1595-
// ctx.set_error(builder.len(), e.to_string());
1596-
// }
1597-
// }
1598-
//
1599-
// builder.commit_row();
1600-
// },
1601-
// ),
1602-
// );
1511+
registry.register_passthrough_nullable_2_arg::<GeometryType, Int32Type, GeometryType, _, _>(
1512+
"st_transform",
1513+
|_, _, _| FunctionDomain::MayThrow,
1514+
vectorize_with_builder_2_arg::<GeometryType, Int32Type, GeometryType>(
1515+
|original, to_srid, builder, ctx| {
1516+
if let Some(validity) = &ctx.validity {
1517+
if !validity.get_bit(builder.len()) {
1518+
builder.commit_row();
1519+
return;
1520+
}
1521+
}
1522+
1523+
// All representations of the geo types supported by crates under the GeoRust organization, have not implemented srid().
1524+
// Currently, the srid() of all types returns the default value `None`, so we need to parse it manually here.
1525+
let from_srid = match Ewkb(original).to_geos().unwrap().srid() {
1526+
Some(srid) => srid,
1527+
_ => {
1528+
ctx.set_error(
1529+
builder.len(),
1530+
ErrorCode::GeometryError("input geometry must has the correct SRID")
1531+
.to_string(),
1532+
);
1533+
builder.commit_row();
1534+
return;
1535+
}
1536+
};
1537+
1538+
match st_transform_impl(original, from_srid, to_srid) {
1539+
Ok(data) => {
1540+
builder.put_slice(data.as_slice());
1541+
}
1542+
Err(e) => {
1543+
ctx.set_error(builder.len(), e.to_string());
1544+
}
1545+
}
1546+
1547+
builder.commit_row();
1548+
},
1549+
),
1550+
);
1551+
1552+
registry.register_passthrough_nullable_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType, _, _>(
1553+
"st_transform",
1554+
|_, _, _,_| FunctionDomain::MayThrow,
1555+
vectorize_with_builder_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType>(
1556+
|original, from_srid, to_srid, builder, ctx| {
1557+
if let Some(validity) = &ctx.validity {
1558+
if !validity.get_bit(builder.len()) {
1559+
builder.commit_row();
1560+
return;
1561+
}
1562+
}
1563+
1564+
match st_transform_impl(original, from_srid, to_srid) {
1565+
Ok(data) => {
1566+
builder.put_slice(data.as_slice());
1567+
}
1568+
Err(e) => {
1569+
ctx.set_error(builder.len(), e.to_string());
1570+
}
1571+
}
1572+
1573+
builder.commit_row();
1574+
},
1575+
),
1576+
);
16031577
}
16041578

1605-
// fn make_crs(srid: i32) -> String {
1606-
// format!("EPSG:{}", srid)
1607-
// }
1579+
fn st_transform_impl(
1580+
original: &[u8],
1581+
from_srid: i32,
1582+
to_srid: i32,
1583+
) -> databend_common_exception::Result<Vec<u8>> {
1584+
let from_proj = Proj::from_epsg_code(
1585+
u16::try_from(from_srid).map_err(|_| ErrorCode::GeometryError("invalid from srid"))?,
1586+
)
1587+
.map_err(|_| ErrorCode::GeometryError("invalid from srid"))?;
1588+
let to_proj = Proj::from_epsg_code(
1589+
u16::try_from(to_srid).map_err(|_| ErrorCode::GeometryError("invalid to srid"))?,
1590+
)
1591+
.map_err(|_| ErrorCode::GeometryError("invalid to srid"))?;
1592+
1593+
let old = Ewkb(original.to_vec());
1594+
Ewkb(old.to_ewkb(old.dims(), Some(from_srid)).unwrap())
1595+
.to_geo()
1596+
.map_err(ErrorCode::from)
1597+
.and_then(|mut geom| {
1598+
// EPSG:4326 WGS84 in proj4rs is in radians, not degrees.
1599+
if from_srid == 4326 {
1600+
geom.to_radians_in_place();
1601+
}
1602+
transform(&from_proj, &to_proj, &mut geom).map_err(|_| {
1603+
ErrorCode::GeometryError(format!(
1604+
"transform from {} to {} failed",
1605+
from_srid, to_srid
1606+
))
1607+
})?;
1608+
if to_srid == 4326 {
1609+
geom.to_degrees_in_place();
1610+
}
1611+
let round_geom = round_geometry_coordinates(geom);
1612+
round_geom
1613+
.to_ewkb(round_geom.dims(), Some(to_srid))
1614+
.map_err(ErrorCode::from)
1615+
})
1616+
}
16081617

16091618
#[inline]
16101619
fn get_shared_srid(geometries: &Vec<Geometry>) -> Result<Option<i32>, String> {
@@ -1895,3 +1904,121 @@ fn count_points(geom: &geo_types::Geometry) -> usize {
18951904
geo_types::Geometry::Triangle(_) => 4,
18961905
}
18971906
}
1907+
1908+
/// The last three decimal places of the f64 type are inconsistent between aarch64 and x86 platforms,
1909+
/// causing unit test results to fail. We will only retain six decimal places.
1910+
fn round_geometry_coordinates(geom: geo::Geometry<f64>) -> geo::Geometry<f64> {
1911+
fn round_coordinate(coord: f64) -> f64 {
1912+
(coord * 1_000_000.0).round() / 1_000_000.0
1913+
}
1914+
1915+
match geom {
1916+
geo::Geometry::Point(point) => geo::Geometry::Point(Point::new(
1917+
round_coordinate(point.x()),
1918+
round_coordinate(point.y()),
1919+
)),
1920+
geo::Geometry::LineString(linestring) => geo::Geometry::LineString(
1921+
linestring
1922+
.into_iter()
1923+
.map(|coord| coord!(x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
1924+
.collect(),
1925+
),
1926+
geo::Geometry::Polygon(polygon) => {
1927+
let outer_ring = polygon.exterior();
1928+
let mut rounded_inner_rings = Vec::new();
1929+
1930+
for inner_ring in polygon.interiors() {
1931+
let rounded_coords: Vec<Coord<f64>> = inner_ring
1932+
.into_iter()
1933+
.map(
1934+
|coord| coord!( x: round_coordinate(coord.x), y: round_coordinate(coord.y)),
1935+
)
1936+
.collect();
1937+
rounded_inner_rings.push(LineString(rounded_coords));
1938+
}
1939+
1940+
let rounded_polygon = Polygon::new(
1941+
LineString(
1942+
outer_ring
1943+
.into_iter()
1944+
.map(|coord| coord!( x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
1945+
.collect(),
1946+
),
1947+
rounded_inner_rings,
1948+
);
1949+
1950+
geo::Geometry::Polygon(rounded_polygon)
1951+
}
1952+
geo::Geometry::MultiPoint(multipoint) => geo::Geometry::MultiPoint(
1953+
multipoint
1954+
.into_iter()
1955+
.map(|point| Point::new(round_coordinate(point.x()), round_coordinate(point.y())))
1956+
.collect(),
1957+
),
1958+
geo::Geometry::MultiLineString(multilinestring) => {
1959+
let rounded_lines: Vec<LineString<f64>> = multilinestring
1960+
.into_iter()
1961+
.map(|linestring| {
1962+
LineString(
1963+
linestring
1964+
.into_iter()
1965+
.map(|coord| coord!(x: round_coordinate(coord.x), y: round_coordinate(coord.y)))
1966+
.collect(),
1967+
)
1968+
})
1969+
.collect();
1970+
1971+
geo::Geometry::MultiLineString(geo::MultiLineString::new(rounded_lines))
1972+
}
1973+
geo::Geometry::MultiPolygon(multipolygon) => {
1974+
let rounded_polygons: Vec<Polygon<f64>> = multipolygon
1975+
.into_iter()
1976+
.map(|polygon| {
1977+
let outer_ring = polygon.exterior().into_iter()
1978+
.map(|coord| coord!( x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
1979+
.collect::<Vec<Coord<f64>>>();
1980+
1981+
let mut rounded_inner_rings = Vec::new();
1982+
for inner_ring in polygon.interiors() {
1983+
let rounded_coords: Vec<Coord<f64>> = inner_ring
1984+
.into_iter()
1985+
.map(|coord| coord!( x:round_coordinate(coord.x), y: coord.y))
1986+
.collect();
1987+
rounded_inner_rings.push(LineString(rounded_coords));
1988+
}
1989+
1990+
Polygon::new(LineString(outer_ring), rounded_inner_rings)
1991+
})
1992+
.collect();
1993+
geo::Geometry::MultiPolygon(geo::MultiPolygon::new(rounded_polygons))
1994+
}
1995+
geo::Geometry::GeometryCollection(geometrycollection) => geo::Geometry::GeometryCollection(
1996+
geometrycollection
1997+
.into_iter()
1998+
.map(round_geometry_coordinates)
1999+
.collect(),
2000+
),
2001+
geo::Geometry::Line(line) => geo::Geometry::Line(geo::Line::new(
2002+
Point::new(
2003+
round_coordinate(line.start.x),
2004+
round_coordinate(line.start.y),
2005+
),
2006+
Point::new(round_coordinate(line.end.x), round_coordinate(line.end.y)),
2007+
)),
2008+
geo::Geometry::Rect(rect) => geo::Geometry::Rect(geo::Rect::new(
2009+
Point::new(
2010+
round_coordinate(rect.min().x),
2011+
round_coordinate(rect.min().y),
2012+
),
2013+
Point::new(
2014+
round_coordinate(rect.max().x),
2015+
round_coordinate(rect.max().y),
2016+
),
2017+
)),
2018+
geo::Geometry::Triangle(triangle) => geo::Geometry::Triangle(geo::Triangle::new(
2019+
coord!(x: round_coordinate(triangle.0.x), y: round_coordinate(triangle.0.y)),
2020+
coord!(x: round_coordinate(triangle.1.x), y: round_coordinate(triangle.1.y)),
2021+
coord!(x: round_coordinate(triangle.2.x), y: round_coordinate(triangle.2.y)),
2022+
)),
2023+
}
2024+
}

0 commit comments

Comments
 (0)