Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support Struct field selection in the SQL engine, RENAME and REPLACE select wildcard options #17109

Merged
merged 7 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ pub(super) fn check_expand_literals(

if duplicate_check && !names.insert(name) {
let msg = format!(
"the name: '{}' is duplicate\n\n\
"the name '{}' is duplicate\n\n\
It's possible that multiple expressions are returning the same default column \
name. If this is the case, try renaming the columns with \
`.alias(\"new_name\")` to avoid duplicate column names.",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ pub(super) use self::rolling_by::RollingFunctionBy;
#[cfg(feature = "strings")]
pub use self::strings::StringFunction;
#[cfg(feature = "dtype-struct")]
pub(crate) use self::struct_::StructFunction;
pub use self::struct_::StructFunction;
#[cfg(feature = "trigonometry")]
pub(super) use self::trigonometry::TrigonometricFunction;
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/plans/conversion/dsl_to_ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ fn resolve_with_columns(

if !output_names.insert(field.name().clone()) {
let msg = format!(
"the name: '{}' passed to `LazyFrame.with_columns` is duplicate\n\n\
"the name '{}' passed to `LazyFrame.with_columns` is duplicate\n\n\
It's possible that multiple expressions are returning the same default column name. \
If this is the case, try renaming the columns with `.alias(\"new_name\")` to avoid \
duplicate column names.",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ description = "SQL transpiler for Polars. Converts SQL to Polars logical plans"
arrow = { workspace = true }
polars-core = { workspace = true, features = ["rows"] }
polars-error = { workspace = true }
polars-lazy = { workspace = true, features = ["abs", "binary_encoding", "concat_str", "cross_join", "cum_agg", "dtype-date", "dtype-decimal", "is_in", "list_eval", "log", "meta", "regex", "round_series", "sign", "string_reverse", "strings", "timezones", "trigonometry"] }
polars-lazy = { workspace = true, features = ["abs", "binary_encoding", "concat_str", "cross_join", "cum_agg", "dtype-date", "dtype-decimal", "dtype-struct", "is_in", "list_eval", "log", "meta", "regex", "round_series", "sign", "string_reverse", "strings", "timezones", "trigonometry"] }
polars-ops = { workspace = true }
polars-plan = { workspace = true }
polars-time = { workspace = true }
Expand Down
325 changes: 186 additions & 139 deletions crates/polars-sql/src/context.rs

Large diffs are not rendered by default.

117 changes: 82 additions & 35 deletions crates/polars-sql/src/sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,42 +375,9 @@ impl SQLExprVisitor<'_> {

/// Visit a compound SQL identifier
///
/// e.g. df.column or "df"."column"
/// e.g. tbl.column, struct.field, tbl.struct.field (inc. nested struct fields)
fn visit_compound_identifier(&mut self, idents: &[Ident]) -> PolarsResult<Expr> {
match idents {
[tbl_name, column_name] => {
let mut lf = self
.ctx
.get_table_from_current_scope(&tbl_name.value)
.ok_or_else(|| {
polars_err!(
SQLInterface: "no table or alias named '{}' found",
tbl_name
)
})?;

let schema =
lf.schema_with_arenas(&mut self.ctx.lp_arena, &mut self.ctx.expr_arena)?;
if let Some((_, name, _)) = schema.get_full(&column_name.value) {
let resolved = &self.ctx.resolve_name(&tbl_name.value, &column_name.value);
Ok(if name != resolved {
col(resolved).alias(name)
} else {
col(name)
})
} else {
polars_bail!(
SQLInterface: "no column named '{}' found in table '{}'",
column_name,
tbl_name
)
}
},
_ => polars_bail!(
SQLInterface: "invalid identifier {:?}",
idents
),
}
Ok(resolve_compound_identifier(self.ctx, idents, self.active_schema)?[0].clone())
}

fn visit_interval(&self, interval: &Interval) -> PolarsResult<Expr> {
Expand Down Expand Up @@ -1240,3 +1207,83 @@ fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
_ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
}))
}

pub(crate) fn resolve_compound_identifier(
ctx: &mut SQLContext,
idents: &[Ident],
active_schema: Option<&Schema>,
) -> PolarsResult<Vec<Expr>> {
// inference priority: table > struct > column
let ident_root = &idents[0];
let mut remaining_idents = idents.iter().skip(1);
let mut lf = ctx.get_table_from_current_scope(&ident_root.value);

let schema = if let Some(ref mut lf) = lf {
lf.schema_with_arenas(&mut ctx.lp_arena, &mut ctx.expr_arena)
} else {
Ok(Arc::new(if let Some(active_schema) = active_schema {
active_schema.clone()
} else {
Schema::new()
}))
}?;

let col_dtype: PolarsResult<(Expr, Option<&DataType>)> = if lf.is_none() && schema.is_empty() {
Ok((col(&ident_root.value), None))
} else {
let name = &remaining_idents.next().unwrap().value;
if lf.is_some() && name == "*" {
return Ok(schema
.iter_names()
.map(|name| col(name))
.collect::<Vec<_>>());
} else if let Some((_, name, dtype)) = schema.get_full(name) {
let resolved = &ctx.resolve_name(&ident_root.value, name);
Ok((
if name != resolved {
col(resolved).alias(name)
} else {
col(name)
},
Some(dtype),
))
} else if lf.is_none() {
remaining_idents = idents.iter().skip(1);
Ok((col(&ident_root.value), schema.get(&ident_root.value)))
} else {
polars_bail!(
SQLInterface: "no column named '{}' found in table '{}'",
name,
ident_root
)
}
};

// additional ident levels index into struct fields
let (mut column, mut dtype) = col_dtype?;
for ident in remaining_idents {
let name = ident.value.as_str();
match dtype {
Some(DataType::Struct(fields)) if name == "*" => {
return Ok(fields
.iter()
.map(|fld| column.clone().struct_().field_by_name(&fld.name))
.collect())
},
Some(DataType::Struct(fields)) => {
dtype = fields
.iter()
.find(|fld| fld.name == name)
.map(|fld| &fld.dtype);
},
Some(dtype) if name == "*" => {
polars_bail!(SQLSyntax: "cannot expand '*' on non-Struct dtype; found {:?}", dtype)
},
_ => {
dtype = None;
},
}
column = column.struct_().field_by_name(name);
}
Ok(vec![column])
}
Loading