Skip to content

Commit 14053ee

Browse files
authored
feat(query): ST_GEOGRAPHYFROMEWKT (#16302)
* st_geographyfromewkt Signed-off-by: coldWater <forsaken628@gmail.com> * TypeName Signed-off-by: coldWater <forsaken628@gmail.com> * test and fix Signed-off-by: coldWater <forsaken628@gmail.com> * fix test Signed-off-by: coldWater <forsaken628@gmail.com> --------- Signed-off-by: coldWater <forsaken628@gmail.com>
1 parent 6bb25f2 commit 14053ee

File tree

26 files changed

+264
-59
lines changed

26 files changed

+264
-59
lines changed

src/common/io/src/geography.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use databend_common_exception::ErrorCode;
16+
use databend_common_exception::Result;
17+
use geo::CoordsIter;
18+
use geo::Geometry;
19+
use geos::wkt::TryFromWkt;
20+
use geozero::ToWkb;
21+
22+
pub const LONGITUDE_MIN: f64 = -180.0;
23+
pub const LONGITUDE_MAX: f64 = 180.0;
24+
pub const LATITUDE_MIN: f64 = -90.0;
25+
pub const LATITUDE_MAX: f64 = 90.0;
26+
27+
use super::geometry::cut_srid;
28+
29+
pub fn geography_from_ewkt_bytes(ewkt: &[u8]) -> Result<Vec<u8>> {
30+
let s = std::str::from_utf8(ewkt).map_err(|e| ErrorCode::GeometryError(e.to_string()))?;
31+
geography_from_ewkt(s)
32+
}
33+
34+
pub fn geography_from_ewkt(ewkt: &str) -> Result<Vec<u8>> {
35+
let (srid, wkt) = cut_srid(ewkt)?;
36+
let geog: Geometry<f64> =
37+
Geometry::try_from_wkt_str(wkt).map_err(|e| ErrorCode::GeometryError(e.to_string()))?;
38+
geog.coords_iter().try_for_each(|c| check_point(c.x, c.y))?;
39+
geog.to_ewkb(geozero::CoordDimensions::xy(), srid)
40+
.map_err(ErrorCode::from)
41+
}
42+
43+
pub fn check_point(lon: f64, lat: f64) -> Result<()> {
44+
if !(LONGITUDE_MIN..=LONGITUDE_MAX).contains(&lon) {
45+
return Err(ErrorCode::GeometryError(
46+
"longitude is out of range".to_string(),
47+
));
48+
}
49+
if !(LATITUDE_MIN..=LATITUDE_MAX).contains(&lat) {
50+
return Err(ErrorCode::GeometryError(
51+
"latitude is out of range".to_string(),
52+
));
53+
}
54+
Ok(())
55+
}

src/common/io/src/geometry.rs

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -80,29 +80,40 @@ impl Display for GeometryDataType {
8080
}
8181
}
8282

83-
pub fn parse_to_ewkb(buf: &[u8], srid: Option<i32>) -> Result<Vec<u8>> {
84-
let wkt = std::str::from_utf8(buf).map_err(|e| ErrorCode::GeometryError(e.to_string()))?;
85-
let input_wkt = wkt.trim().to_ascii_uppercase();
86-
87-
let parts: Vec<&str> = input_wkt.split(';').collect();
88-
89-
let parsed_srid: Option<i32> = srid.or_else(|| {
90-
if input_wkt.starts_with("SRID=") && parts.len() == 2 {
91-
parts[0].replace("SRID=", "").parse().ok()
92-
} else {
93-
None
94-
}
95-
});
96-
97-
let geo_part = if parts.len() == 2 { parts[1] } else { parts[0] };
83+
pub fn parse_bytes_to_ewkb(buf: &[u8], srid: Option<i32>) -> Result<Vec<u8>> {
84+
let s = std::str::from_utf8(buf).map_err(|e| ErrorCode::GeometryError(e.to_string()))?;
85+
parse_to_ewkb(s, srid)
86+
}
9887

88+
pub fn parse_to_ewkb(buf: &str, srid: Option<i32>) -> Result<Vec<u8>> {
89+
let (parsed_srid, geo_part) = cut_srid(buf)?;
90+
let srid = srid.or(parsed_srid);
9991
let geom: Geometry<f64> = Geometry::try_from_wkt_str(geo_part)
10092
.map_err(|e| ErrorCode::GeometryError(e.to_string()))?;
10193

102-
geom.to_ewkb(CoordDimensions::xy(), parsed_srid)
94+
geom.to_ewkb(CoordDimensions::xy(), srid)
10395
.map_err(ErrorCode::from)
10496
}
10597

98+
pub fn cut_srid(ewkt: &str) -> Result<(Option<i32>, &str)> {
99+
match ewkt.find(';') {
100+
None => Ok((None, ewkt)),
101+
Some(idx) => {
102+
let prefix = ewkt[..idx].trim();
103+
match prefix
104+
.strip_prefix("SRID=")
105+
.or_else(|| prefix.strip_prefix("srid="))
106+
.and_then(|srid| srid.trim().parse::<i32>().ok())
107+
{
108+
Some(srid) => Ok((Some(srid), &ewkt[idx + 1..])),
109+
None => Err(ErrorCode::GeometryError(format!(
110+
"invalid EWKT with prefix {prefix}"
111+
))),
112+
}
113+
}
114+
}
115+
}
116+
106117
/// An enum representing any possible geometry subtype.
107118
///
108119
/// WKB/EWKB: start with 01/00(1bit)

src/common/io/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ pub mod cursor_ext;
3838
mod decimal;
3939
mod escape;
4040
mod format_settings;
41-
mod geometry;
41+
pub mod geography;
42+
pub mod geometry;
4243
mod position;
4344
mod stat_buffer;
4445
pub mod wkb;
@@ -50,6 +51,7 @@ pub use decimal::display_decimal_256;
5051
pub use escape::escape_string;
5152
pub use escape::escape_string_with_quote;
5253
pub use geometry::geometry_format;
54+
pub use geometry::parse_bytes_to_ewkb;
5355
pub use geometry::parse_to_ewkb;
5456
pub use geometry::parse_to_subtype;
5557
pub use geometry::Axis;

src/query/ast/src/ast/expr.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,7 @@ pub enum TypeName {
940940
},
941941
Variant,
942942
Geometry,
943+
Geography,
943944
Nullable(Box<TypeName>),
944945
NotNull(Box<TypeName>),
945946
}
@@ -1059,6 +1060,9 @@ impl Display for TypeName {
10591060
TypeName::Geometry => {
10601061
write!(f, "GEOMETRY")?;
10611062
}
1063+
TypeName::Geography => {
1064+
write!(f, "GEOGRAPHY")?;
1065+
}
10621066
TypeName::Nullable(ty) => {
10631067
write!(f, "{} NULL", ty)?;
10641068
}

src/query/ast/src/parser/expr.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,7 @@ pub fn type_name(i: Input) -> IResult<TypeName> {
16211621
);
16221622
let ty_variant = value(TypeName::Variant, rule! { VARIANT | JSON });
16231623
let ty_geometry = value(TypeName::Geometry, rule! { GEOMETRY });
1624+
let ty_geography = value(TypeName::Geography, rule! { GEOGRAPHY });
16241625
map_res(
16251626
alt((
16261627
rule! {
@@ -1650,6 +1651,7 @@ pub fn type_name(i: Input) -> IResult<TypeName> {
16501651
| #ty_string
16511652
| #ty_variant
16521653
| #ty_geometry
1654+
| #ty_geography
16531655
| #ty_nullable
16541656
) ~ #nullable? : "type name" },
16551657
)),

src/query/ast/src/parser/token.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,8 @@ pub enum TokenKind {
642642
GENERATED,
643643
#[token("GEOMETRY", ignore(ascii_case))]
644644
GEOMETRY,
645+
#[token("GEOGRAPHY", ignore(ascii_case))]
646+
GEOGRAPHY,
645647
#[token("GLOBAL", ignore(ascii_case))]
646648
GLOBAL,
647649
#[token("GRAPH", ignore(ascii_case))]

src/query/ast/tests/it/testdata/expr-error.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ error:
2929
--> SQL:1:14
3030
|
3131
1 | CAST(col1 AS foo)
32-
| ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `LONGBLOB`, `GEOMETRY`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE`
32+
| ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `LONGBLOB`, `GEOMETRY`, `GEOGRAPHY`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE`
3333
| |
3434
| while parsing `CAST(... AS ...)`
3535
| while parsing expression

src/query/ast/tests/it/testdata/stmt-error.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ error:
2929
--> SQL:1:19
3030
|
3131
1 | create table a (c varch)
32-
| ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `TEXT`, `JSON`, or `GEOMETRY`
32+
| ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `GEOGRAPHY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `TEXT`, `JSON`, or `GEOMETRY`
3333
| | |
3434
| | while parsing `<column name> <type> [DEFAULT <expr>] [AS (<expr>) VIRTUAL] [AS (<expr>) STORED] [COMMENT '<comment>']`
3535
| while parsing `CREATE [OR REPLACE] TABLE [IF NOT EXISTS] [<database>.]<table> [<source>] [<table_options>]`
@@ -42,7 +42,7 @@ error:
4242
--> SQL:1:25
4343
|
4444
1 | create table a (c tuple())
45-
| ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `NULLABLE`, <Ident>, <LiteralString>, or `IDENTIFIER`
45+
| ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, `NULLABLE`, <Ident>, <LiteralString>, or `IDENTIFIER`
4646
| | | |
4747
| | | while parsing type name
4848
| | while parsing `<column name> <type> [DEFAULT <expr>] [AS (<expr>) VIRTUAL] [AS (<expr>) STORED] [COMMENT '<comment>']`
@@ -70,7 +70,7 @@ error:
7070
--> SQL:1:38
7171
|
7272
1 | create table a (b tuple(c int, uint64));
73-
| ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, or `NULLABLE`
73+
| ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, or `NULLABLE`
7474
| | | |
7575
| | | while parsing TUPLE(<name> <type>, ...)
7676
| | | while parsing type name
@@ -946,7 +946,7 @@ error:
946946
| ------ while parsing `CREATE [OR REPLACE] DICTIONARY [IF NOT EXISTS] <dictionary_name> [(<column>, ...)] PRIMARY KEY [<primary_key>, ...] SOURCE (<source_name> ([<source_options>])) [COMMENT <comment>] `
947947
2 | (
948948
3 | user_name tuple(),
949-
| --------- ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `NULLABLE`, <Ident>, <LiteralString>, or `IDENTIFIER`
949+
| --------- ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `GEOGRAPHY`, `NULLABLE`, <Ident>, <LiteralString>, or `IDENTIFIER`
950950
| | |
951951
| | while parsing type name
952952
| while parsing `<column name> <type> [DEFAULT <expr>] [AS (<expr>) VIRTUAL] [AS (<expr>) STORED] [COMMENT '<comment>']`

src/query/expression/src/types/geography.rs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use std::ops::Range;
1919
use borsh::BorshDeserialize;
2020
use borsh::BorshSerialize;
2121
use databend_common_exception::Result;
22+
use databend_common_io::geography::*;
2223
use databend_common_io::wkb::make_point;
2324
use databend_common_io::wkb::read_wkb_header;
2425
pub use databend_common_io::wkb::WkbInfo;
@@ -42,11 +43,6 @@ use crate::values::Scalar;
4243
use crate::values::ScalarRef;
4344
use crate::ColumnBuilder;
4445

45-
pub const LONGITUDE_MIN: f64 = -180.0;
46-
pub const LONGITUDE_MAX: f64 = 180.0;
47-
pub const LATITUDE_MIN: f64 = -90.0;
48-
pub const LATITUDE_MAX: f64 = 90.0;
49-
5046
#[derive(
5147
Clone,
5248
Default,
@@ -96,14 +92,8 @@ impl<'a> BinaryLike<'a> for GeographyRef<'a> {
9692
pub struct GeographyType;
9793

9894
impl GeographyType {
99-
pub fn check_point(lon: f64, lat: f64) -> Result<(), String> {
100-
if !(LONGITUDE_MIN..=LONGITUDE_MAX).contains(&lon)
101-
|| !(LATITUDE_MIN..=LATITUDE_MAX).contains(&lat)
102-
{
103-
Err("latitude is out of range".to_string())
104-
} else {
105-
Ok(())
106-
}
95+
pub fn check_point(lon: f64, lat: f64) -> Result<()> {
96+
check_point(lon, lat)
10797
}
10898

10999
pub fn point(lon: f64, lat: f64) -> Geography {

src/query/expression/src/values.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1335,7 +1335,7 @@ impl Column {
13351335
GeometryType::from_data(data)
13361336
}
13371337
DataType::Geography => {
1338-
use crate::types::geography;
1338+
use databend_common_io::geography;
13391339

13401340
let mut builder = GeographyType::create_builder(len, &[]);
13411341
for _ in 0..len {

0 commit comments

Comments
 (0)