Skip to content

Commit d00c00a

Browse files
committed
Expose missing functions to python
1 parent a3908ed commit d00c00a

File tree

6 files changed

+39
-4
lines changed

6 files changed

+39
-4
lines changed

src/common.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
2727
m.add_class::<df_schema::PyDFSchema>()?;
2828
m.add_class::<data_type::PyDataType>()?;
2929
m.add_class::<data_type::DataTypeMap>()?;
30+
m.add_class::<data_type::RexType>()?;
3031
m.add_class::<data_type::PythonType>()?;
3132
m.add_class::<data_type::SqlType>()?;
3233
m.add_class::<data_type::NullTreatment>()?;

src/common/data_type.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ pub enum SqlType {
764764
#[allow(non_camel_case_types)]
765765
#[allow(clippy::upper_case_acronyms)]
766766
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
767-
#[pyclass(name = "PythonType", module = "datafusion.common")]
767+
#[pyclass(name = "NullTreatment", module = "datafusion.common")]
768768
pub enum NullTreatment {
769769
IGNORE_NULLS,
770770
RESPECT_NULLS,

src/dataframe.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,18 @@ impl PyDataFrame {
320320
Ok(Self::new(df))
321321
}
322322

323+
#[pyo3(signature = (columns, preserve_nulls=true))]
324+
fn unnest_columns(&self, columns: Vec<String>, preserve_nulls: bool) -> PyResult<Self> {
325+
let unnest_options = UnnestOptions { preserve_nulls };
326+
let cols = columns.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
327+
let df = self
328+
.df
329+
.as_ref()
330+
.clone()
331+
.unnest_columns_with_options(&cols, unnest_options)?;
332+
Ok(Self::new(df))
333+
}
334+
323335
/// Calculate the intersection of two `DataFrame`s. The two `DataFrame`s must have exactly the same schema
324336
fn intersect(&self, py_df: PyDataFrame) -> PyResult<Self> {
325337
let new_df = self

src/expr.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
583583
m.add_class::<placeholder::PyPlaceholder>()?;
584584
m.add_class::<grouping_set::PyGroupingSet>()?;
585585
m.add_class::<case::PyCase>()?;
586+
m.add_class::<conditional_expr::PyCaseBuilder>()?;
586587
m.add_class::<cast::PyCast>()?;
587588
m.add_class::<cast::PyTryCast>()?;
588589
m.add_class::<between::PyBetween>()?;

src/functions.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ fn concat_ws(sep: String, args: Vec<PyExpr>) -> PyResult<PyExpr> {
232232
Ok(functions::string::expr_fn::concat_ws(lit(sep), args).into())
233233
}
234234

235+
#[pyfunction]
236+
#[pyo3(signature = (values, regex, flags = None))]
237+
fn regexp_like(values: PyExpr, regex: PyExpr, flags: Option<PyExpr>) -> PyResult<PyExpr> {
238+
Ok(functions::expr_fn::regexp_like(values.expr, regex.expr, flags.map(|x| x.expr)).into())
239+
}
240+
235241
#[pyfunction]
236242
#[pyo3(signature = (values, regex, flags = None))]
237243
fn regexp_match(values: PyExpr, regex: PyExpr, flags: Option<PyExpr>) -> PyResult<PyExpr> {
@@ -256,12 +262,12 @@ fn regexp_replace(
256262
}
257263
/// Creates a new Sort Expr
258264
#[pyfunction]
259-
fn order_by(expr: PyExpr, asc: Option<bool>, nulls_first: Option<bool>) -> PyResult<PyExpr> {
265+
fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult<PyExpr> {
260266
Ok(PyExpr {
261267
expr: datafusion_expr::Expr::Sort(Sort {
262268
expr: Box::new(expr.expr),
263-
asc: asc.unwrap_or(true),
264-
nulls_first: nulls_first.unwrap_or(true),
269+
asc,
270+
nulls_first,
265271
}),
266272
})
267273
}
@@ -488,6 +494,7 @@ expr_fn!(chr, arg, "Returns the character with the given code.");
488494
expr_fn_vec!(coalesce);
489495
expr_fn!(cos, num);
490496
expr_fn!(cosh, num);
497+
expr_fn!(cot, num);
491498
expr_fn!(degrees, num);
492499
expr_fn!(decode, input encoding);
493500
expr_fn!(encode, input encoding);
@@ -499,6 +506,7 @@ expr_fn!(gcd, x y);
499506
expr_fn!(initcap, string, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.");
500507
expr_fn!(isnan, num);
501508
expr_fn!(iszero, num);
509+
expr_fn!(levenshtein, string1 string2);
502510
expr_fn!(lcm, x y);
503511
expr_fn!(left, string n, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters.");
504512
expr_fn!(ln, num);
@@ -555,7 +563,9 @@ expr_fn!(sqrt, num);
555563
expr_fn!(starts_with, string prefix, "Returns true if string starts with prefix.");
556564
expr_fn!(strpos, string substring, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)");
557565
expr_fn!(substr, string position);
566+
expr_fn!(substr_index, string delimiter count);
558567
expr_fn!(substring, string position length);
568+
expr_fn!(find_in_set, string string_list);
559569
expr_fn!(tan, num);
560570
expr_fn!(tanh, num);
561571
expr_fn!(
@@ -568,13 +578,15 @@ expr_fn_vec!(to_timestamp);
568578
expr_fn_vec!(to_timestamp_millis);
569579
expr_fn_vec!(to_timestamp_micros);
570580
expr_fn_vec!(to_timestamp_seconds);
581+
expr_fn_vec!(to_unixtime);
571582
expr_fn!(current_date);
572583
expr_fn!(current_time);
573584
expr_fn!(date_part, part date);
574585
expr_fn!(datepart, date_part, part date);
575586
expr_fn!(date_trunc, part date);
576587
expr_fn!(datetrunc, date_trunc, part date);
577588
expr_fn!(date_bin, stride source origin);
589+
expr_fn!(make_date, year month day);
578590

579591
expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.");
580592
expr_fn_vec!(trim, "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string.");
@@ -712,6 +724,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
712724
m.add_wrapped(wrap_pyfunction!(corr))?;
713725
m.add_wrapped(wrap_pyfunction!(cos))?;
714726
m.add_wrapped(wrap_pyfunction!(cosh))?;
727+
m.add_wrapped(wrap_pyfunction!(cot))?;
715728
m.add_wrapped(wrap_pyfunction!(count))?;
716729
m.add_wrapped(wrap_pyfunction!(count_star))?;
717730
m.add_wrapped(wrap_pyfunction!(covar))?;
@@ -725,6 +738,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
725738
m.add_wrapped(wrap_pyfunction!(date_part))?;
726739
m.add_wrapped(wrap_pyfunction!(datetrunc))?;
727740
m.add_wrapped(wrap_pyfunction!(date_trunc))?;
741+
m.add_wrapped(wrap_pyfunction!(make_date))?;
728742
m.add_wrapped(wrap_pyfunction!(digest))?;
729743
m.add_wrapped(wrap_pyfunction!(ends_with))?;
730744
m.add_wrapped(wrap_pyfunction!(exp))?;
@@ -737,6 +751,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
737751
m.add_wrapped(wrap_pyfunction!(initcap))?;
738752
m.add_wrapped(wrap_pyfunction!(isnan))?;
739753
m.add_wrapped(wrap_pyfunction!(iszero))?;
754+
m.add_wrapped(wrap_pyfunction!(levenshtein))?;
740755
m.add_wrapped(wrap_pyfunction!(lcm))?;
741756
m.add_wrapped(wrap_pyfunction!(left))?;
742757
m.add_wrapped(wrap_pyfunction!(length))?;
@@ -764,6 +779,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
764779
m.add_wrapped(wrap_pyfunction!(pow))?;
765780
m.add_wrapped(wrap_pyfunction!(radians))?;
766781
m.add_wrapped(wrap_pyfunction!(random))?;
782+
m.add_wrapped(wrap_pyfunction!(regexp_like))?;
767783
m.add_wrapped(wrap_pyfunction!(regexp_match))?;
768784
m.add_wrapped(wrap_pyfunction!(regexp_replace))?;
769785
m.add_wrapped(wrap_pyfunction!(repeat))?;
@@ -789,7 +805,9 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
789805
m.add_wrapped(wrap_pyfunction!(strpos))?;
790806
m.add_wrapped(wrap_pyfunction!(r#struct))?; // Use raw identifier since struct is a keyword
791807
m.add_wrapped(wrap_pyfunction!(substr))?;
808+
m.add_wrapped(wrap_pyfunction!(substr_index))?;
792809
m.add_wrapped(wrap_pyfunction!(substring))?;
810+
m.add_wrapped(wrap_pyfunction!(find_in_set))?;
793811
m.add_wrapped(wrap_pyfunction!(sum))?;
794812
m.add_wrapped(wrap_pyfunction!(tan))?;
795813
m.add_wrapped(wrap_pyfunction!(tanh))?;
@@ -798,6 +816,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
798816
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
799817
m.add_wrapped(wrap_pyfunction!(to_timestamp_micros))?;
800818
m.add_wrapped(wrap_pyfunction!(to_timestamp_seconds))?;
819+
m.add_wrapped(wrap_pyfunction!(to_unixtime))?;
801820
m.add_wrapped(wrap_pyfunction!(translate))?;
802821
m.add_wrapped(wrap_pyfunction!(trim))?;
803822
m.add_wrapped(wrap_pyfunction!(trunc))?;

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> {
9292
m.add_class::<config::PyConfig>()?;
9393
m.add_class::<sql::logical::PyLogicalPlan>()?;
9494
m.add_class::<physical_plan::PyExecutionPlan>()?;
95+
m.add_class::<record_batch::PyRecordBatch>()?;
96+
m.add_class::<record_batch::PyRecordBatchStream>()?;
9597

9698
// Register `common` as a submodule. Matching `datafusion-common` https://docs.rs/datafusion-common/latest/datafusion_common/
9799
let common = PyModule::new_bound(py, "common")?;

0 commit comments

Comments
 (0)