From 7eb8fe0bbbaca95bcb9023e6af35084bcff4091e Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:15:00 +0100 Subject: [PATCH 01/16] feat: rename DataFrame.melt to DataFrame.unpivot, and rename its args: id_vars -> index, value_vars -> on --- crates/polars-core/src/frame/explode.rs | 103 +++++++++--------- crates/polars-core/src/prelude.rs | 2 +- crates/polars-lazy/src/frame/mod.rs | 8 +- crates/polars-lazy/src/tests/queries.rs | 10 +- .../src/pipeline/dispatcher/mod.rs | 2 +- crates/polars-plan/src/plans/builder_dsl.rs | 4 +- crates/polars-plan/src/plans/builder_ir.rs | 4 +- crates/polars-plan/src/plans/functions/dsl.rs | 8 +- crates/polars-plan/src/plans/functions/mod.rs | 22 ++-- .../polars-plan/src/plans/functions/schema.rs | 16 +-- .../plans/optimizer/predicate_pushdown/mod.rs | 4 +- .../projection_pushdown/functions/mod.rs | 8 +- .../functions/{melt.rs => unpivot.rs} | 14 +-- .../src/plans/optimizer/slice_pushdown_lp.rs | 2 +- crates/polars/src/docs/eager.rs | 8 +- docs/_build/API_REFERENCE_LINKS.yml | 4 +- .../transformations/{melt.py => unpivot.py} | 6 +- docs/src/rust/Cargo.toml | 4 +- .../transformations/{melt.rs => unpivot.rs} | 6 +- docs/user-guide/concepts/streaming.md | 2 +- docs/user-guide/transformations/index.md | 2 +- docs/user-guide/transformations/melt.md | 21 ---- docs/user-guide/transformations/unpivot.md | 21 ++++ mkdocs.yml | 2 +- .../reference/dataframe/modify_select.rst | 2 +- .../reference/lazyframe/modify_select.rst | 2 +- py-polars/polars/dataframe/frame.py | 38 ++++--- py-polars/polars/lazyframe/frame.py | 34 +++--- py-polars/src/dataframe/general.rs | 14 +-- py-polars/src/lazyframe/mod.rs | 10 +- py-polars/src/lazyframe/visitor/nodes.rs | 11 +- .../tests/unit/operations/test_filter.py | 4 +- .../{test_melt.py => test_unpivot.py} | 42 +++---- py-polars/tests/unit/test_projections.py | 6 +- 34 files changed, 228 insertions(+), 218 deletions(-) rename crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/{melt.rs => unpivot.rs} (85%) rename docs/src/python/user-guide/transformations/{melt.py => unpivot.py} (68%) rename docs/src/rust/user-guide/transformations/{melt.rs => unpivot.rs} (78%) delete mode 100644 docs/user-guide/transformations/melt.md create mode 100644 docs/user-guide/transformations/unpivot.md rename py-polars/tests/unit/operations/{test_melt.py => test_unpivot.py} (57%) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index 7b1055eccd22..047497aa8a11 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -20,15 +20,15 @@ fn get_exploded(series: &Series) -> PolarsResult<(Series, OffsetsBuffer)> { } } -/// Arguments for `[DataFrame::melt]` function +/// Arguments for `[DataFrame::unpivot]` function #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))] -pub struct MeltArgs { - pub id_vars: Vec, - pub value_vars: Vec, +pub struct UnpivotArgs { + pub index: Vec, + pub on: Vec, pub variable_name: Option, pub value_name: Option, - /// Whether the melt may be done + /// Whether the unpivot may be done /// in the streaming engine /// This will not have a stable ordering pub streamable: bool, @@ -189,10 +189,10 @@ impl DataFrame { /// /// # Arguments /// - /// * `id_vars` - String slice that represent the columns to use as id variables. - /// * `value_vars` - String slice that represent the columns to use as value variables. + /// * `index` - String slice that represent the columns to use as id variables. + /// * `on` - String slice that represent the columns to use as value variables. /// - /// If `value_vars` is empty all columns that are not in `id_vars` will be used. + /// If `on` is empty all columns that are not in `index` will be used. /// /// ```ignore /// # use polars_core::prelude::*; @@ -202,9 +202,9 @@ impl DataFrame { /// "D" => &[2, 4, 6] /// )?; /// - /// let melted = df.melt(&["A", "B"], &["C", "D"])?; + /// let unpivoted = df.unpivot(&["A", "B"], &["C", "D"])?; /// println!("{:?}", df); - /// println!("{:?}", melted); + /// println!("{:?}", unpivoted); /// # Ok::<(), PolarsError>(()) /// ``` /// Outputs: @@ -239,25 +239,25 @@ impl DataFrame { /// | "a" | 5 | "D" | 6 | /// +-----+-----+----------+-------+ /// ``` - pub fn melt(&self, id_vars: I, value_vars: J) -> PolarsResult + pub fn unpivot(&self, index: I, on: J) -> PolarsResult where I: IntoVec, J: IntoVec, { - let id_vars = id_vars.into_vec(); - let value_vars = value_vars.into_vec(); - self.melt2(MeltArgs { - id_vars, - value_vars, + let id_vars = index.into_vec(); + let value_vars = on.into_vec(); + self.unpivot2(UnpivotArgs { + index: id_vars, + on: value_vars, ..Default::default() }) } - /// Similar to melt, but without generics. This may be easier if you want to pass - /// an empty `id_vars` or empty `value_vars`. - pub fn melt2(&self, args: MeltArgs) -> PolarsResult { - let id_vars = args.id_vars; - let mut value_vars = args.value_vars; + /// Similar to unpivot, but without generics. This may be easier if you want to pass + /// an empty `index` or empty `on`. + pub fn unpivot2(&self, args: UnpivotArgs) -> PolarsResult { + let index = args.index; + let mut on = args.on; let variable_name = args.variable_name.as_deref().unwrap_or("variable"); let value_name = args.value_name.as_deref().unwrap_or("value"); @@ -265,25 +265,25 @@ impl DataFrame { let len = self.height(); // if value vars is empty we take all columns that are not in id_vars. - if value_vars.is_empty() { + if on.is_empty() { // return empty frame if there are no columns available to use as value vars - if id_vars.len() == self.width() { + if index.len() == self.width() { let variable_col = Series::new_empty(variable_name, &DataType::String); let value_col = Series::new_empty(variable_name, &DataType::Null); - let mut out = self.select(id_vars).unwrap().clear().columns; + let mut out = self.select(index).unwrap().clear().columns; out.push(variable_col); out.push(value_col); return Ok(unsafe { DataFrame::new_no_checks(out) }); } - let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str())); - value_vars = self + let index_set = PlHashSet::from_iter(index.iter().map(|s| s.as_str())); + on = self .get_columns() .iter() .filter_map(|s| { - if id_vars_set.contains(s.name()) { + if index_set.contains(s.name()) { None } else { Some(s.name().into()) @@ -294,7 +294,7 @@ impl DataFrame { // values will all be placed in single column, so we must find their supertype let schema = self.schema(); - let mut iter = value_vars.iter().map(|v| { + let mut iter = on.iter().map(|v| { schema .get(v) .ok_or_else(|| polars_err!(ColumnNotFound: "{}", v)) @@ -304,31 +304,30 @@ impl DataFrame { st = try_get_supertype(&st, dt?)?; } - // The column name of the variable that is melted - let mut variable_col = - MutableBinaryViewArray::::with_capacity(len * value_vars.len() + 1); + // The column name of the variable that is unpivoted + let mut variable_col = MutableBinaryViewArray::::with_capacity(len * on.len() + 1); // prepare ids - let ids_ = self.select_with_schema_unchecked(id_vars, &schema)?; + let ids_ = self.select_with_schema_unchecked(index, &schema)?; let mut ids = ids_.clone(); if ids.width() > 0 { - for _ in 0..value_vars.len() - 1 { + for _ in 0..on.len() - 1 { ids.vstack_mut_unchecked(&ids_) } } ids.as_single_chunk_par(); drop(ids_); - let mut values = Vec::with_capacity(value_vars.len()); + let mut values = Vec::with_capacity(on.len()); - for value_column_name in &value_vars { + for value_column_name in &on { variable_col.extend_constant(len, Some(value_column_name.as_str())); // ensure we go via the schema so we are O(1) // self.column() is linear - // together with this loop that would make it O^2 over value_vars + // together with this loop that would make it O^2 over `on` let (pos, _name, _dtype) = schema.try_get_full(value_column_name)?; let col = &self.columns[pos]; let value_col = col.cast(&st).map_err( - |_| polars_err!(InvalidOperation: "'melt/unpivot' not supported for dtype: {}", col.dtype()), + |_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}", col.dtype()), )?; values.extend_from_slice(value_col.chunks()) } @@ -434,7 +433,7 @@ mod test { #[test] #[cfg_attr(miri, ignore)] - fn test_melt() -> PolarsResult<()> { + fn test_unpivot() -> PolarsResult<()> { let df = df!("A" => &["a", "b", "a"], "B" => &[1, 3, 5], "C" => &[10, 11, 12], @@ -442,20 +441,20 @@ mod test { ) .unwrap(); - let melted = df.melt(["A", "B"], ["C", "D"])?; + let unpivoted = df.unpivot(["A", "B"], ["C", "D"])?; assert_eq!( - Vec::from(melted.column("value")?.i32()?), + Vec::from(unpivoted.column("value")?.i32()?), &[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)] ); - let args = MeltArgs { - id_vars: vec![], - value_vars: vec![], + let args = UnpivotArgs { + index: vec![], + on: vec![], ..Default::default() }; - let melted = df.melt2(args).unwrap(); - let value = melted.column("value")?; + let unpivoted = df.unpivot2(args).unwrap(); + let value = unpivoted.column("value")?; // String because of supertype let value = value.str()?; let value = value.into_no_null_iter().collect::>(); @@ -464,22 +463,22 @@ mod test { &["a", "b", "a", "1", "3", "5", "10", "11", "12", "2", "4", "6"] ); - let args = MeltArgs { - id_vars: vec!["A".into()], - value_vars: vec![], + let args = UnpivotArgs { + index: vec!["A".into()], + on: vec![], ..Default::default() }; - let melted = df.melt2(args).unwrap(); - let value = melted.column("value")?; + let unpivoted = df.unpivot2(args).unwrap(); + let value = unpivoted.column("value")?; let value = value.i32()?; let value = value.into_no_null_iter().collect::>(); assert_eq!(value, &[1, 3, 5, 10, 11, 12, 2, 4, 6]); - let variable = melted.column("variable")?; + let variable = unpivoted.column("variable")?; let variable = variable.str()?; let variable = variable.into_no_null_iter().collect::>(); assert_eq!(variable, &["B", "B", "B", "C", "C", "C", "D", "D", "D"]); - assert!(melted.column("A").is_ok()); + assert!(unpivoted.column("A").is_ok()); Ok(()) } } diff --git a/crates/polars-core/src/prelude.rs b/crates/polars-core/src/prelude.rs index eda6704d5c39..934e5ddbd69d 100644 --- a/crates/polars-core/src/prelude.rs +++ b/crates/polars-core/src/prelude.rs @@ -37,7 +37,7 @@ pub use crate::datatypes::{ArrayCollectIterExt, *}; pub use crate::error::{ polars_bail, polars_ensure, polars_err, polars_warn, PolarsError, PolarsResult, }; -pub use crate::frame::explode::MeltArgs; +pub use crate::frame::explode::UnpivotArgs; #[cfg(feature = "algorithm_group_by")] pub(crate) use crate::frame::group_by::aggregations::*; #[cfg(feature = "algorithm_group_by")] diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index e12b7e5566f1..0e617c53d0f4 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -1600,12 +1600,12 @@ impl LazyFrame { self.slice(neg_tail, n) } - /// Melt the DataFrame from wide to long format. + /// Unpivot the DataFrame from wide to long format. /// - /// See [`MeltArgs`] for information on how to melt a DataFrame. - pub fn melt(self, args: MeltArgs) -> LazyFrame { + /// See [`UnpivotArgs`] for information on how to unpivot a DataFrame. + pub fn unpivot(self, args: UnpivotArgs) -> LazyFrame { let opt_state = self.get_opt_state(); - let lp = self.get_plan_builder().melt(args).build(); + let lp = self.get_plan_builder().unpivot(args).build(); Self::from_logical_plan(lp, opt_state) } diff --git a/crates/polars-lazy/src/tests/queries.rs b/crates/polars-lazy/src/tests/queries.rs index cc49a2c2f32d..690dc6f57ea9 100644 --- a/crates/polars-lazy/src/tests/queries.rs +++ b/crates/polars-lazy/src/tests/queries.rs @@ -46,18 +46,18 @@ fn test_lazy_alias() { } #[test] -fn test_lazy_melt() { +fn test_lazy_unpivot() { let df = get_df(); - let args = MeltArgs { - id_vars: vec!["petal_width".into(), "petal_length".into()], - value_vars: vec!["sepal_length".into(), "sepal_width".into()], + let args = UnpivotArgs { + index: vec!["petal_width".into(), "petal_length".into()], + on: vec!["sepal_length".into(), "sepal_width".into()], ..Default::default() }; let out = df .lazy() - .melt(args) + .unpivot(args) .filter(col("variable").eq(lit("sepal_length"))) .select([col("variable"), col("petal_width"), col("value")]) .collect() diff --git a/crates/polars-pipe/src/pipeline/dispatcher/mod.rs b/crates/polars-pipe/src/pipeline/dispatcher/mod.rs index 901f1fd771cb..216dec376c05 100644 --- a/crates/polars-pipe/src/pipeline/dispatcher/mod.rs +++ b/crates/polars-pipe/src/pipeline/dispatcher/mod.rs @@ -62,7 +62,7 @@ impl ThreadedSink { /// succeed. /// Think for example on multiply a few columns, or applying a predicate. /// Operators can shrink the batches: filter -/// Grow the batches: explode/ melt +/// Grow the batches: explode/ unpivot /// Keep them the same size: element-wise operations /// The probe side of join operations is also an operator. /// diff --git a/crates/polars-plan/src/plans/builder_dsl.rs b/crates/polars-plan/src/plans/builder_dsl.rs index 2e90b494344f..ccebeed92992 100644 --- a/crates/polars-plan/src/plans/builder_dsl.rs +++ b/crates/polars-plan/src/plans/builder_dsl.rs @@ -339,10 +339,10 @@ impl DslBuilder { .into() } - pub fn melt(self, args: MeltArgs) -> Self { + pub fn unpivot(self, args: UnpivotArgs) -> Self { DslPlan::MapFunction { input: Arc::new(self.0), - function: DslFunction::Melt { args }, + function: DslFunction::Unpivot { args }, } .into() } diff --git a/crates/polars-plan/src/plans/builder_ir.rs b/crates/polars-plan/src/plans/builder_ir.rs index bd7f42ab5596..1bab177f41b1 100644 --- a/crates/polars-plan/src/plans/builder_ir.rs +++ b/crates/polars-plan/src/plans/builder_ir.rs @@ -297,10 +297,10 @@ impl<'a> IRBuilder<'a> { self.add_alp(lp) } - pub fn melt(self, args: Arc) -> Self { + pub fn unpivot(self, args: Arc) -> Self { let lp = IR::MapFunction { input: self.root, - function: FunctionNode::Melt { + function: FunctionNode::Unpivot { args, schema: Default::default(), }, diff --git a/crates/polars-plan/src/plans/functions/dsl.rs b/crates/polars-plan/src/plans/functions/dsl.rs index 9ee12e10a35c..6c53e8b676f0 100644 --- a/crates/polars-plan/src/plans/functions/dsl.rs +++ b/crates/polars-plan/src/plans/functions/dsl.rs @@ -9,8 +9,8 @@ pub enum DslFunction { Explode { columns: Vec, }, - Melt { - args: MeltArgs, + Unpivot { + args: UnpivotArgs, }, RowIndex { name: Arc, @@ -79,7 +79,7 @@ impl DslFunction { schema: Default::default(), } }, - DslFunction::Melt { args } => FunctionNode::Melt { + DslFunction::Unpivot { args } => FunctionNode::Unpivot { args: Arc::new(args), schema: Default::default(), }, @@ -125,7 +125,7 @@ impl Display for DslFunction { match self { FunctionNode(inner) => write!(f, "{inner}"), Explode { .. } => write!(f, "EXPLODE"), - Melt { .. } => write!(f, "MELT"), + Unpivot { .. } => write!(f, "UNPIVOT"), RowIndex { .. } => write!(f, "WITH ROW INDEX"), Stats(_) => write!(f, "STATS"), FillNan(_) => write!(f, "FILL NAN"), diff --git a/crates/polars-plan/src/plans/functions/mod.rs b/crates/polars-plan/src/plans/functions/mod.rs index 1a24a5c4e852..bdb778541e0a 100644 --- a/crates/polars-plan/src/plans/functions/mod.rs +++ b/crates/polars-plan/src/plans/functions/mod.rs @@ -90,8 +90,8 @@ pub enum FunctionNode { #[cfg_attr(feature = "serde", serde(skip))] schema: CachedSchema, }, - Melt { - args: Arc, + Unpivot { + args: Arc, #[cfg_attr(feature = "serde", serde(skip))] schema: CachedSchema, }, @@ -125,7 +125,7 @@ impl PartialEq for FunctionNode { }, ) => existing_l == existing_r && new_l == new_r, (Explode { columns: l, .. }, Explode { columns: r, .. }) => l == r, - (Melt { args: l, .. }, Melt { args: r, .. }) => l == r, + (Unpivot { args: l, .. }, Unpivot { args: r, .. }) => l == r, (RowIndex { name: l, .. }, RowIndex { name: r, .. }) => l == r, #[cfg(feature = "merge_sorted")] (MergeSorted { column: l }, MergeSorted { column: r }) => l == r, @@ -165,7 +165,7 @@ impl Hash for FunctionNode { new.hash(state); }, FunctionNode::Explode { columns, schema: _ } => columns.hash(state), - FunctionNode::Melt { args, schema: _ } => args.hash(state), + FunctionNode::Unpivot { args, schema: _ } => args.hash(state), FunctionNode::RowIndex { name, schema: _, @@ -187,7 +187,7 @@ impl FunctionNode { #[cfg(feature = "merge_sorted")] MergeSorted { .. } => false, Count { .. } | Unnest { .. } | Rename { .. } | Explode { .. } => true, - Melt { args, .. } => args.streamable, + Unpivot { args, .. } => args.streamable, Opaque { streamable, .. } => *streamable, #[cfg(feature = "python")] OpaquePython { streamable, .. } => *streamable, @@ -201,7 +201,7 @@ impl FunctionNode { match self { #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, - Explode { .. } | Melt { .. } => true, + Explode { .. } | Unpivot { .. } => true, _ => false, } } @@ -212,7 +212,7 @@ impl FunctionNode { Opaque { predicate_pd, .. } => *predicate_pd, #[cfg(feature = "python")] OpaquePython { predicate_pd, .. } => *predicate_pd, - Rechunk | Unnest { .. } | Rename { .. } | Explode { .. } | Melt { .. } => true, + Rechunk | Unnest { .. } | Rename { .. } | Explode { .. } | Unpivot { .. } => true, #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, RowIndex { .. } | Count { .. } => false, @@ -231,7 +231,7 @@ impl FunctionNode { | Unnest { .. } | Rename { .. } | Explode { .. } - | Melt { .. } => true, + | Unpivot { .. } => true, #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, RowIndex { .. } => true, @@ -295,9 +295,9 @@ impl FunctionNode { }, Rename { existing, new, .. } => rename::rename_impl(df, existing, new), Explode { columns, .. } => df.explode(columns.as_ref()), - Melt { args, .. } => { + Unpivot { args, .. } => { let args = (**args).clone(); - df.melt2(args) + df.unpivot2(args) }, RowIndex { name, offset, .. } => df.with_row_index(name.as_ref(), *offset), } @@ -353,7 +353,7 @@ impl Display for FunctionNode { }, Rename { .. } => write!(f, "RENAME"), Explode { .. } => write!(f, "EXPLODE"), - Melt { .. } => write!(f, "MELT"), + Unpivot { .. } => write!(f, "UNPIVOT"), RowIndex { .. } => write!(f, "WITH ROW INDEX"), } } diff --git a/crates/polars-plan/src/plans/functions/schema.rs b/crates/polars-plan/src/plans/functions/schema.rs index 532cdd9f4168..fbb83c137733 100644 --- a/crates/polars-plan/src/plans/functions/schema.rs +++ b/crates/polars-plan/src/plans/functions/schema.rs @@ -11,7 +11,7 @@ impl FunctionNode { RowIndex { schema, .. } | Explode { schema, .. } | Rename { schema, .. } - | Melt { schema, .. } => { + | Unpivot { schema, .. } => { let mut guard = schema.lock().unwrap(); *guard = None; }, @@ -98,7 +98,7 @@ impl FunctionNode { Ok(Cow::Owned(row_index_schema(schema, input_schema, name))) }, Explode { schema, columns } => explode_schema(schema, input_schema, columns), - Melt { schema, args } => melt_schema(args, schema, input_schema), + Unpivot { schema, args } => unpivot_schema(args, schema, input_schema), } } } @@ -143,8 +143,8 @@ fn explode_schema<'a>( Ok(Cow::Owned(schema)) } -fn melt_schema<'a>( - args: &MeltArgs, +fn unpivot_schema<'a>( + args: &UnpivotArgs, cached_schema: &CachedSchema, input_schema: &'a Schema, ) -> PolarsResult> { @@ -154,7 +154,7 @@ fn melt_schema<'a>( } let mut new_schema = args - .id_vars + .index .iter() .map(|id| Field::new(id, input_schema.get(id).unwrap().clone())) .collect::(); @@ -175,15 +175,15 @@ fn melt_schema<'a>( let mut supertype = DataType::Null; // take all columns that are not in `id_vars` as `value_var` - if args.value_vars.is_empty() { - let id_vars = PlHashSet::from_iter(&args.id_vars); + if args.on.is_empty() { + let id_vars = PlHashSet::from_iter(&args.index); for (name, dtype) in input_schema.iter() { if !id_vars.contains(name) { supertype = try_get_supertype(&supertype, dtype).unwrap(); } } } else { - for name in &args.value_vars { + for name in &args.on { let dtype = input_schema.get(name).unwrap(); supertype = try_get_supertype(&supertype, dtype).unwrap(); } diff --git a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs index c1c29d3ec310..6d07ff86ddf7 100644 --- a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs @@ -524,7 +524,7 @@ impl<'a> PredicatePushDown<'a> { expr_arena, )) }, - FunctionNode::Melt { args, .. } => { + FunctionNode::Unpivot { args, .. } => { let variable_name = args.variable_name.as_deref().unwrap_or("variable"); let value_name = args.value_name.as_deref().unwrap_or("value"); @@ -533,7 +533,7 @@ impl<'a> PredicatePushDown<'a> { let name = &*name; name == variable_name || name == value_name - || args.value_vars.iter().any(|s| s.as_str() == name) + || args.on.iter().any(|s| s.as_str() == name) }; let local_predicates = transfer_to_local_by_name( expr_arena, diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs index 12d236635a4b..0a9c80827b9e 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs @@ -1,6 +1,6 @@ -mod melt; +mod unpivot; -use melt::process_melt; +use unpivot::process_unpivot; use super::*; @@ -64,13 +64,13 @@ pub(super) fn process_functions( .explode(columns.clone()) .build()) }, - Melt { ref args, .. } => { + Unpivot { ref args, .. } => { let lp = IR::MapFunction { input, function: function.clone(), }; - process_melt( + process_unpivot( proj_pd, lp, args, diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/melt.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/unpivot.rs similarity index 85% rename from crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/melt.rs rename to crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/unpivot.rs index 8c63331ae704..70704f76fa9b 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/melt.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/unpivot.rs @@ -1,17 +1,17 @@ use super::*; #[allow(clippy::too_many_arguments)] -pub(super) fn process_melt( +pub(super) fn process_unpivot( proj_pd: &mut ProjectionPushDown, lp: IR, - args: &Arc, + args: &Arc, input: Node, acc_projections: Vec, projections_seen: usize, lp_arena: &mut Arena, expr_arena: &mut Arena, ) -> PolarsResult { - if args.value_vars.is_empty() { + if args.on.is_empty() { // restart projection pushdown proj_pd.no_pushdown_restart_opt(lp, acc_projections, projections_seen, lp_arena, expr_arena) } else { @@ -28,10 +28,10 @@ pub(super) fn process_melt( } // make sure that the requested columns are projected - args.id_vars.iter().for_each(|name| { + args.index.iter().for_each(|name| { add_str_to_accumulated(name, &mut acc_projections, &mut projected_names, expr_arena) }); - args.value_vars.iter().for_each(|name| { + args.on.iter().for_each(|name| { add_str_to_accumulated(name, &mut acc_projections, &mut projected_names, expr_arena) }); @@ -44,9 +44,9 @@ pub(super) fn process_melt( expr_arena, )?; - // re-make melt node so that the schema is updated + // re-make unpivot node so that the schema is updated let lp = IRBuilder::new(input, expr_arena, lp_arena) - .melt(args.clone()) + .unpivot(args.clone()) .build(); if local_projections.is_empty() { diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs index c88302703d05..b804e50692ea 100644 --- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs +++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs @@ -349,7 +349,7 @@ impl SlicePushDown { | m @ (DataFrameScan {..}, _) | m @ (Sort {..}, _) | m @ (MapFunction {function: FunctionNode::Explode {..}, ..}, _) - | m @ (MapFunction {function: FunctionNode::Melt {..}, ..}, _) + | m @ (MapFunction {function: FunctionNode::Unpivot {..}, ..}, _) | m @ (Cache {..}, _) | m @ (Distinct {..}, _) | m @ (GroupBy{..},_) diff --git a/crates/polars/src/docs/eager.rs b/crates/polars/src/docs/eager.rs index a62872e8059d..28e12057bde8 100644 --- a/crates/polars/src/docs/eager.rs +++ b/crates/polars/src/docs/eager.rs @@ -24,7 +24,7 @@ //! * [Joins](#joins) //! * [GroupBy](#group_by) //! - [pivot](#pivot) -//! * [Melt](#melt) +//! * [Unpivot](#unpivot) //! * [Explode](#explode) //! * [IO](#io) //! - [Read CSV](#read-csv) @@ -460,7 +460,7 @@ //! # } //! ``` //! -//! ## Melt +//! ## Unpivot //! //! ``` //! use polars::prelude::*; @@ -473,8 +473,8 @@ //! "D" => &[2, 4, 6] //! ]?; //! -//! let melted = df.melt(&["A", "B"], &["C", "D"]).unwrap(); -//! // melted: +//! let unpivoted = df.unpivot(&["A", "B"], &["C", "D"]).unwrap(); +//! // unpivoted: //! //! // +-----+-----+----------+-------+ //! // | A | B | variable | value | diff --git a/docs/_build/API_REFERENCE_LINKS.yml b/docs/_build/API_REFERENCE_LINKS.yml index 02d8fd143ff3..aa3e28f68d31 100644 --- a/docs/_build/API_REFERENCE_LINKS.yml +++ b/docs/_build/API_REFERENCE_LINKS.yml @@ -98,7 +98,7 @@ python: join_asof: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_asof.html concat: https://docs.pola.rs/api/python/stable/reference/api/polars.concat.html pivot: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.pivot.html - melt: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.melt.html + unpivot: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.unpivot.html is_between: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.is_between.html date_range: https://docs.pola.rs/api/python/stable/reference/api/polars.date_range.html @@ -177,7 +177,7 @@ rust: DataFrame.explode: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.explode pivot: https://docs.pola.rs/api/rust/dev/polars_lazy/frame/pivot/fn.pivot.html - melt: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.melt + unpivot: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.unpivot upsample: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.upsample join_asof: https://docs.pola.rs/api/rust/dev/polars/prelude/trait.AsofJoin.html#method.join_asof unnest: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.unnest diff --git a/docs/src/python/user-guide/transformations/melt.py b/docs/src/python/user-guide/transformations/unpivot.py similarity index 68% rename from docs/src/python/user-guide/transformations/melt.py rename to docs/src/python/user-guide/transformations/unpivot.py index e9bf53a96ec7..03efe884baa0 100644 --- a/docs/src/python/user-guide/transformations/melt.py +++ b/docs/src/python/user-guide/transformations/unpivot.py @@ -12,7 +12,7 @@ print(df) # --8<-- [end:df] -# --8<-- [start:melt] -out = df.melt(id_vars=["A", "B"], value_vars=["C", "D"]) +# --8<-- [start:unpivot] +out = df.unpivot(index=["A", "B"], on=["C", "D"]) print(out) -# --8<-- [end:melt] +# --8<-- [end:unpivot] diff --git a/docs/src/rust/Cargo.toml b/docs/src/rust/Cargo.toml index fa41c7eed87a..3f40edd72d42 100644 --- a/docs/src/rust/Cargo.toml +++ b/docs/src/rust/Cargo.toml @@ -132,8 +132,8 @@ name = "user-guide-transformations-joins" path = "user-guide/transformations/joins.rs" required-features = ["polars/lazy"] [[bin]] -name = "user-guide-transformations-melt" -path = "user-guide/transformations/melt.rs" +name = "user-guide-transformations-unpivot" +path = "user-guide/transformations/unpivot.rs" [[bin]] name = "user-guide-transformations-pivot" path = "user-guide/transformations/pivot.rs" diff --git a/docs/src/rust/user-guide/transformations/melt.rs b/docs/src/rust/user-guide/transformations/unpivot.rs similarity index 78% rename from docs/src/rust/user-guide/transformations/melt.rs rename to docs/src/rust/user-guide/transformations/unpivot.rs index ff797423d293..a094d7364e7d 100644 --- a/docs/src/rust/user-guide/transformations/melt.rs +++ b/docs/src/rust/user-guide/transformations/unpivot.rs @@ -13,9 +13,9 @@ fn main() -> Result<(), Box> { println!("{}", &df); // --8<-- [end:df] - // --8<-- [start:melt] - let out = df.melt(["A", "B"], ["C", "D"])?; + // --8<-- [start:unpivot] + let out = df.unpivot(["A", "B"], ["C", "D"])?; println!("{}", &out); - // --8<-- [end:melt] + // --8<-- [end:unpivot] Ok(()) } diff --git a/docs/user-guide/concepts/streaming.md b/docs/user-guide/concepts/streaming.md index 0365e944f47e..0dbafec6ec7b 100644 --- a/docs/user-guide/concepts/streaming.md +++ b/docs/user-guide/concepts/streaming.md @@ -18,7 +18,7 @@ Streaming is supported for many operations including: - `join` - `unique` - `sort` -- `explode`,`melt` +- `explode`,`unpivot` - `scan_csv`,`scan_parquet`,`scan_ipc` This list is not exhaustive. Polars is in active development, and more operations can be added without explicit notice. diff --git a/docs/user-guide/transformations/index.md b/docs/user-guide/transformations/index.md index cd673786643c..3092c5be3c37 100644 --- a/docs/user-guide/transformations/index.md +++ b/docs/user-guide/transformations/index.md @@ -5,4 +5,4 @@ The focus of this section is to describe different types of data transformations - [Joins](joins.md) - [Concatenation](concatenation.md) - [Pivot](pivot.md) -- [Melt](melt.md) +- [Unpivot](unpivot.md) diff --git a/docs/user-guide/transformations/melt.md b/docs/user-guide/transformations/melt.md deleted file mode 100644 index 3e6efe35723e..000000000000 --- a/docs/user-guide/transformations/melt.md +++ /dev/null @@ -1,21 +0,0 @@ -# Melts - -Melt operations unpivot a DataFrame from wide format to long format - -## Dataset - -{{code_block('user-guide/transformations/melt','df',['DataFrame'])}} - -```python exec="on" result="text" session="user-guide/transformations/melt" ---8<-- "python/user-guide/transformations/melt.py:df" -``` - -## Eager + lazy - -`Eager` and `lazy` have the same API. - -{{code_block('user-guide/transformations/melt','melt',['melt'])}} - -```python exec="on" result="text" session="user-guide/transformations/melt" ---8<-- "python/user-guide/transformations/melt.py:melt" -``` diff --git a/docs/user-guide/transformations/unpivot.md b/docs/user-guide/transformations/unpivot.md new file mode 100644 index 000000000000..83715a001dc8 --- /dev/null +++ b/docs/user-guide/transformations/unpivot.md @@ -0,0 +1,21 @@ +# Unpivots + +Unpivot unpivots a DataFrame from wide format to long format + +## Dataset + +{{code_block('user-guide/transformations/unpivot','df',['DataFrame'])}} + +```python exec="on" result="text" session="user-guide/transformations/unpivot" +--8<-- "python/user-guide/transformations/unpivot.py:df" +``` + +## Eager + lazy + +`Eager` and `lazy` have the same API. + +{{code_block('user-guide/transformations/unpivot','unpivot',['unpivot'])}} + +```python exec="on" result="text" session="user-guide/transformations/unpivot" +--8<-- "python/user-guide/transformations/unpivot.py:unpivot" +``` diff --git a/mkdocs.yml b/mkdocs.yml index 47fa8d9a216c..d2e3c1e637fa 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,7 +43,7 @@ nav: - user-guide/transformations/joins.md - user-guide/transformations/concatenation.md - user-guide/transformations/pivot.md - - user-guide/transformations/melt.md + - user-guide/transformations/unpivot.md - Time series: - user-guide/transformations/time-series/parsing.md - user-guide/transformations/time-series/filter.md diff --git a/py-polars/docs/source/reference/dataframe/modify_select.rst b/py-polars/docs/source/reference/dataframe/modify_select.rst index 26cb474f7c63..da497de12daf 100644 --- a/py-polars/docs/source/reference/dataframe/modify_select.rst +++ b/py-polars/docs/source/reference/dataframe/modify_select.rst @@ -35,7 +35,6 @@ Manipulation/selection DataFrame.join DataFrame.join_asof DataFrame.limit - DataFrame.melt DataFrame.merge_sorted DataFrame.partition_by DataFrame.pipe @@ -64,6 +63,7 @@ Manipulation/selection DataFrame.transpose DataFrame.unique DataFrame.unnest + DataFrame.unpivot DataFrame.unstack DataFrame.update DataFrame.upsample diff --git a/py-polars/docs/source/reference/lazyframe/modify_select.rst b/py-polars/docs/source/reference/lazyframe/modify_select.rst index 41c83b2c7a20..dfe772923c95 100644 --- a/py-polars/docs/source/reference/lazyframe/modify_select.rst +++ b/py-polars/docs/source/reference/lazyframe/modify_select.rst @@ -28,7 +28,6 @@ Manipulation/selection LazyFrame.join_asof LazyFrame.last LazyFrame.limit - LazyFrame.melt LazyFrame.merge_sorted LazyFrame.rename LazyFrame.reverse @@ -44,6 +43,7 @@ Manipulation/selection LazyFrame.top_k LazyFrame.unique LazyFrame.unnest + LazyFrame.unpivot LazyFrame.update LazyFrame.with_columns LazyFrame.with_columns_seq diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index ec0ce54249ba..17e5c9403822 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7630,6 +7630,10 @@ def pivot( ------- DataFrame + Notes + ----- + In some other frameworks, you might know this operation as `pivot_wider`. + Examples -------- >>> df = pl.DataFrame( @@ -7798,10 +7802,10 @@ def pivot( ) ) - def melt( + def unpivot( self, - id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, - value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, ) -> Self: @@ -7811,22 +7815,28 @@ def melt( Optionally leaves identifiers set. This function is useful to massage a DataFrame into a format where one or more - columns are identifier variables (id_vars) while all other columns, considered - measured variables (value_vars), are "unpivoted" to the row axis leaving just + columns are identifier variables (index) while all other columns, considered + measured variables (on), are "unpivoted" to the row axis leaving just two non-identifier columns, 'variable' and 'value'. Parameters ---------- - id_vars + index Column(s) or selector(s) to use as identifier variables. - value_vars - Column(s) or selector(s) to use as values variables; if `value_vars` - is empty all columns that are not in `id_vars` will be used. + on + Column(s) or selector(s) to use as values variables; if `on` + is empty all columns that are not in `index` will be used. variable_name Name to give to the `variable` column. Defaults to "variable" value_name Name to give to the `value` column. Defaults to "value" + Notes + ----- + If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, + but with `index` replacing `id_vars` and `on` replacing `value_vars`. + In other frameworks, you might know this operation as `pivot_longer`. + Examples -------- >>> df = pl.DataFrame( @@ -7837,7 +7847,7 @@ def melt( ... } ... ) >>> import polars.selectors as cs - >>> df.melt(id_vars="a", value_vars=cs.numeric()) + >>> df.unpivot(index="a", on=cs.numeric()) shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │ @@ -7852,12 +7862,10 @@ def melt( │ z ┆ c ┆ 6 │ └─────┴──────────┴───────┘ """ - value_vars = [] if value_vars is None else _expand_selectors(self, value_vars) - id_vars = [] if id_vars is None else _expand_selectors(self, id_vars) + on = [] if on is None else _expand_selectors(self, on) + index = [] if index is None else _expand_selectors(self, index) - return self._from_pydf( - self._df.melt(id_vars, value_vars, value_name, variable_name) - ) + return self._from_pydf(self._df.unpivot(index, on, value_name, variable_name)) @unstable() def unstack( diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 698884360910..69d58afeedb1 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5703,10 +5703,10 @@ def drop_nulls( subset = _expand_selectors(self, subset) return self._from_pyldf(self._ldf.drop_nulls(subset)) - def melt( + def unpivot( self, - id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, - value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, *, @@ -5718,26 +5718,32 @@ def melt( Optionally leaves identifiers set. This function is useful to massage a DataFrame into a format where one or more - columns are identifier variables (id_vars) while all other columns, considered - measured variables (value_vars), are "unpivoted" to the row axis leaving just + columns are identifier variables (index) while all other columns, considered + measured variables (on), are "unpivoted" to the row axis leaving just two non-identifier columns, 'variable' and 'value'. Parameters ---------- - id_vars + index Column(s) or selector(s) to use as identifier variables. - value_vars - Column(s) or selector(s) to use as values variables; if `value_vars` - is empty all columns that are not in `id_vars` will be used. + on + Column(s) or selector(s) to use as values variables; if `on` + is empty all columns that are not in `index` will be used. variable_name Name to give to the `variable` column. Defaults to "variable" value_name Name to give to the `value` column. Defaults to "value" streamable Allow this node to run in the streaming engine. - If this runs in streaming, the output of the melt operation + If this runs in streaming, the output of the unpivot operation will not have a stable ordering. + Notes + ----- + If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, + but with `index` replacing `id_vars` and `on` replacing `value_vars`. + In other frameworks, you might know this operation as `pivot_longer`. + Examples -------- >>> lf = pl.LazyFrame( @@ -5748,7 +5754,7 @@ def melt( ... } ... ) >>> import polars.selectors as cs - >>> lf.melt(id_vars="a", value_vars=cs.numeric()).collect() + >>> lf.unpivot(index="a", on=cs.numeric()).collect() shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │ @@ -5763,11 +5769,11 @@ def melt( │ z ┆ c ┆ 6 │ └─────┴──────────┴───────┘ """ - value_vars = [] if value_vars is None else _expand_selectors(self, value_vars) - id_vars = [] if id_vars is None else _expand_selectors(self, id_vars) + on = [] if on is None else _expand_selectors(self, on) + index = [] if index is None else _expand_selectors(self, index) return self._from_pyldf( - self._ldf.melt(id_vars, value_vars, value_name, variable_name, streamable) + self._ldf.unpivot(index, on, value_name, variable_name, streamable) ) def map_batches( diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index fef05d9350ff..340f5d1837ba 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -397,22 +397,22 @@ impl PyDataFrame { PyDataFrame::new(self.df.clone()) } - pub fn melt( + pub fn unpivot( &self, - id_vars: Vec, - value_vars: Vec, + index: Vec, + on: Vec, value_name: Option<&str>, variable_name: Option<&str>, ) -> PyResult { - let args = MeltArgs { - id_vars: strings_to_smartstrings(id_vars), - value_vars: strings_to_smartstrings(value_vars), + let args = UnpivotArgs { + index: strings_to_smartstrings(index), + on: strings_to_smartstrings(on), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), streamable: false, }; - let df = self.df.melt2(args).map_err(PyPolarsErr::from)?; + let df = self.df.unpivot2(args).map_err(PyPolarsErr::from)?; Ok(PyDataFrame::new(df)) } diff --git a/py-polars/src/lazyframe/mod.rs b/py-polars/src/lazyframe/mod.rs index 9de61cd2b022..022114db4e42 100644 --- a/py-polars/src/lazyframe/mod.rs +++ b/py-polars/src/lazyframe/mod.rs @@ -1112,7 +1112,7 @@ impl PyLazyFrame { } #[pyo3(signature = (id_vars, value_vars, value_name, variable_name, streamable))] - fn melt( + fn unpivot( &self, id_vars: Vec, value_vars: Vec, @@ -1120,16 +1120,16 @@ impl PyLazyFrame { variable_name: Option, streamable: bool, ) -> Self { - let args = MeltArgs { - id_vars: strings_to_smartstrings(id_vars), - value_vars: strings_to_smartstrings(value_vars), + let args = UnpivotArgs { + index: strings_to_smartstrings(id_vars), + on: strings_to_smartstrings(value_vars), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), streamable, }; let ldf = self.ldf.clone(); - ldf.melt(args).into() + ldf.unpivot(args).into() } fn with_row_index(&self, name: &str, offset: Option) -> Self { diff --git a/py-polars/src/lazyframe/visitor/nodes.rs b/py-polars/src/lazyframe/visitor/nodes.rs index 5c0832a07d0f..c206e59254d3 100644 --- a/py-polars/src/lazyframe/visitor/nodes.rs +++ b/py-polars/src/lazyframe/visitor/nodes.rs @@ -520,13 +520,10 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { columns.iter().map(|s| s.to_string()).collect::>(), ) .to_object(py), - FunctionNode::Melt { args, schema: _ } => ( - "melt", - args.id_vars.iter().map(|s| s.as_str()).collect::>(), - args.value_vars - .iter() - .map(|s| s.as_str()) - .collect::>(), + FunctionNode::Unpivot { args, schema: _ } => ( + "unpivot", + args.index.iter().map(|s| s.as_str()).collect::>(), + args.on.iter().map(|s| s.as_str()).collect::>(), args.variable_name .as_ref() .map_or_else(|| py.None(), |s| s.as_str().to_object(py)), diff --git a/py-polars/tests/unit/operations/test_filter.py b/py-polars/tests/unit/operations/test_filter.py index 61bb23fd231f..85db19b228aa 100644 --- a/py-polars/tests/unit/operations/test_filter.py +++ b/py-polars/tests/unit/operations/test_filter.py @@ -29,7 +29,7 @@ def test_filter_contains_nth_11205() -> None: assert df.filter(pl.first()).is_empty() -def test_melt_values_predicate_pushdown() -> None: +def test_unpivot_values_predicate_pushdown() -> None: lf = pl.DataFrame( { "id": [1], @@ -40,7 +40,7 @@ def test_melt_values_predicate_pushdown() -> None: ).lazy() assert ( - lf.melt("id", ["asset_key_1", "asset_key_2", "asset_key_3"]) + lf.unpivot("id", ["asset_key_1", "asset_key_2", "asset_key_3"]) .filter(pl.col("value") == pl.lit("123")) .collect() ).to_dict(as_series=False) == { diff --git a/py-polars/tests/unit/operations/test_melt.py b/py-polars/tests/unit/operations/test_unpivot.py similarity index 57% rename from py-polars/tests/unit/operations/test_melt.py rename to py-polars/tests/unit/operations/test_unpivot.py index 4c36581e11b7..174928b2e9fc 100644 --- a/py-polars/tests/unit/operations/test_melt.py +++ b/py-polars/tests/unit/operations/test_unpivot.py @@ -5,22 +5,22 @@ from polars.testing import assert_frame_equal -def test_melt() -> None: +def test_unpivot() -> None: df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]}) for _idv, _vv in (("A", ("B", "C")), (cs.string(), cs.integer())): - melted_eager = df.melt(id_vars="A", value_vars=["B", "C"]) - assert all(melted_eager["value"] == [1, 3, 5, 2, 4, 6]) + unpivoted_eager = df.unpivot(index="A", on=["B", "C"]) + assert all(unpivoted_eager["value"] == [1, 3, 5, 2, 4, 6]) - melted_lazy = df.lazy().melt(id_vars="A", value_vars=["B", "C"]) - assert all(melted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6]) + unpivoted_lazy = df.lazy().unpivot(index="A", on=["B", "C"]) + assert all(unpivoted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6]) - melted = df.melt(id_vars="A", value_vars="B") - assert all(melted["value"] == [1, 3, 5]) + unpivoted = df.unpivot(index="A", on="B") + assert all(unpivoted["value"] == [1, 3, 5]) n = 3 - for melted in [df.melt(), df.lazy().melt().collect()]: - assert melted["variable"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n - assert melted["value"].to_list() == [ + for unpivoted in [df.unpivot(), df.lazy().unpivot().collect()]: + assert unpivoted["variable"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n + assert unpivoted["value"].to_list() == [ "a", "b", "c", @@ -32,12 +32,12 @@ def test_melt() -> None: "6", ] - for melted in [ - df.melt(value_name="foo", variable_name="bar"), - df.lazy().melt(value_name="foo", variable_name="bar").collect(), + for unpivoted in [ + df.unpivot(value_name="foo", variable_name="bar"), + df.lazy().unpivot(value_name="foo", variable_name="bar").collect(), ]: - assert melted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n - assert melted["foo"].to_list() == [ + assert unpivoted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n + assert unpivoted["foo"].to_list() == [ "a", "b", "c", @@ -50,7 +50,7 @@ def test_melt() -> None: ] -def test_melt_projection_pd_7747() -> None: +def test_unpivot_projection_pd_7747() -> None: df = pl.LazyFrame( { "number": [1, 2, 1, 2, 1], @@ -60,7 +60,7 @@ def test_melt_projection_pd_7747() -> None: ) result = ( df.with_columns(pl.col("age").alias("wgt")) - .melt(id_vars="number", value_vars="wgt") + .unpivot(index="number", on="wgt") .select("number", "value") .collect() ) @@ -74,10 +74,10 @@ def test_melt_projection_pd_7747() -> None: # https://github.com/pola-rs/polars/issues/10075 -def test_melt_no_value_vars() -> None: +def test_unpivot_no_value_vars() -> None: lf = pl.LazyFrame({"a": [1, 2, 3]}) - result = lf.melt("a") + result = lf.unpivot("a") expected = pl.LazyFrame( schema={"a": pl.Int64, "variable": pl.String, "value": pl.Null} @@ -85,8 +85,8 @@ def test_melt_no_value_vars() -> None: assert_frame_equal(result, expected) -def test_melt_raise_list() -> None: +def test_unpivot_raise_list() -> None: with pytest.raises(pl.exceptions.InvalidOperationError): pl.LazyFrame( {"a": ["x", "y"], "b": [["test", "test2"], ["test3", "test4"]]} - ).melt().collect() + ).unpivot().collect() diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py index 96c3b5a33dad..802061265baa 100644 --- a/py-polars/tests/unit/test_projections.py +++ b/py-polars/tests/unit/test_projections.py @@ -23,12 +23,12 @@ def test_projection_on_semi_join_4789() -> None: assert q.collect().to_dict(as_series=False) == {"a": [1], "p": [1], "seq": [[1]]} -def test_melt_projection_pd_block_4997() -> None: +def test_unpivot_projection_pd_block_4997() -> None: assert ( pl.DataFrame({"col1": ["a"], "col2": ["b"]}) .with_row_index() .lazy() - .melt(id_vars="index") + .unpivot(index="index") .group_by("index") .agg(pl.col("variable").alias("result")) .collect() @@ -69,7 +69,7 @@ def test_unnest_projection_pushdown() -> None: lf = pl.DataFrame({"x|y|z": [1, 2], "a|b|c": [2, 3]}).lazy() mlf = ( - lf.melt() + lf.unpivot() .with_columns(pl.col("variable").str.split_exact("|", 2)) .unnest("variable") ) From d680a87bca8aa15736ea30fad84d2a4c22439458 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 07:58:35 +0100 Subject: [PATCH 02/16] wip --- .../reference/dataframe/modify_select.rst | 1 + .../reference/lazyframe/modify_select.rst | 1 + py-polars/polars/dataframe/frame.py | 38 ++++++++++++++++ py-polars/polars/lazyframe/frame.py | 43 +++++++++++++++++++ .../tests/unit/operations/test_unpivot.py | 13 +++--- 5 files changed, 90 insertions(+), 6 deletions(-) diff --git a/py-polars/docs/source/reference/dataframe/modify_select.rst b/py-polars/docs/source/reference/dataframe/modify_select.rst index da497de12daf..11042e70c7bd 100644 --- a/py-polars/docs/source/reference/dataframe/modify_select.rst +++ b/py-polars/docs/source/reference/dataframe/modify_select.rst @@ -35,6 +35,7 @@ Manipulation/selection DataFrame.join DataFrame.join_asof DataFrame.limit + DataFrame.melt DataFrame.merge_sorted DataFrame.partition_by DataFrame.pipe diff --git a/py-polars/docs/source/reference/lazyframe/modify_select.rst b/py-polars/docs/source/reference/lazyframe/modify_select.rst index dfe772923c95..925591ed8649 100644 --- a/py-polars/docs/source/reference/lazyframe/modify_select.rst +++ b/py-polars/docs/source/reference/lazyframe/modify_select.rst @@ -28,6 +28,7 @@ Manipulation/selection LazyFrame.join_asof LazyFrame.last LazyFrame.limit + LazyFrame.melt LazyFrame.merge_sorted LazyFrame.rename LazyFrame.reverse diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 17e5c9403822..d55a1085198d 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -10675,6 +10675,44 @@ def count(self) -> DataFrame: """ return self.lazy().count().collect(_eager=True) + @deprecate_function( + "Use `unpivot` instead, with `index` instead of `id_vars` and `on` instead of `value_vaars`", version="1.0.0" + ) + def melt( + self, + id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + variable_name: str | None = None, + value_name: str | None = None, + ) -> Self: + """ + Unpivot a DataFrame from wide to long format. + + Optionally leaves identifiers set. + + This function is useful to massage a DataFrame into a format where one or more + columns are identifier variables (index) while all other columns, considered + measured variables (on), are "unpivoted" to the row axis leaving just + two non-identifier columns, 'variable' and 'value'. + + .. deprecated 1.0.0 + Please use :meth:`.unpivot` instead. + + Parameters + ---------- + index + Column(s) or selector(s) to use as identifier variables. + on + Column(s) or selector(s) to use as values variables; if `on` + is empty all columns that are not in `index` will be used. + variable_name + Name to give to the `variable` column. Defaults to "variable" + value_name + Name to give to the `value` column. Defaults to "value" + """ + return self.unpivot(index=id_vars, on=value_vars, variable_name=variable_name, value_name=value_name) + + def _prepare_other_arg(other: Any, length: int | None = None) -> Series: # if not a series create singleton series such that it will broadcast diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 69d58afeedb1..97a27603794b 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -6306,3 +6306,46 @@ def count(self) -> Self: └─────┴─────┴─────┘ """ return self._from_pyldf(self._ldf.count()) + + @deprecate_function( + "Use `unpivot` instead, with `index` instead of `id_vars` and `on` instead of `value_vaars`", version="1.0.0" + ) + def melt( + self, + id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + variable_name: str | None = None, + value_name: str | None = None, + *, + streamable: bool = True, + ) -> Self: + """ + Unpivot a DataFrame from wide to long format. + + Optionally leaves identifiers set. + + This function is useful to massage a DataFrame into a format where one or more + columns are identifier variables (index) while all other columns, considered + measured variables (on), are "unpivoted" to the row axis leaving just + two non-identifier columns, 'variable' and 'value'. + + .. deprecated 1.0.0 + Please use :meth:`.unpivot` instead. + + Parameters + ---------- + index + Column(s) or selector(s) to use as identifier variables. + on + Column(s) or selector(s) to use as values variables; if `on` + is empty all columns that are not in `index` will be used. + variable_name + Name to give to the `variable` column. Defaults to "variable" + value_name + Name to give to the `value` column. Defaults to "value" + streamable + Allow this node to run in the streaming engine. + If this runs in streaming, the output of the unpivot operation + will not have a stable ordering. + """ + return self.unpivot(index=id_vars, on=value_vars, variable_name=variable_name, value_name=value_name, streamable=streamable) diff --git a/py-polars/tests/unit/operations/test_unpivot.py b/py-polars/tests/unit/operations/test_unpivot.py index 174928b2e9fc..dda5f212ca1c 100644 --- a/py-polars/tests/unit/operations/test_unpivot.py +++ b/py-polars/tests/unit/operations/test_unpivot.py @@ -58,12 +58,13 @@ def test_unpivot_projection_pd_7747() -> None: "weight": [100, 103, 95, 90, 110], } ) - result = ( - df.with_columns(pl.col("age").alias("wgt")) - .unpivot(index="number", on="wgt") - .select("number", "value") - .collect() - ) + with pytest.deprecated_call(): + result = ( + df.with_columns(pl.col("age").alias("wgt")) + .melt(id_vars="number", value_vars="wgt") + .select("number", "value") + .collect() + ) expected = pl.DataFrame( { "number": [1, 2, 1, 2, 1], From dc7d1a4c968e9b49c001aeb3eba917f2b198e627 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 08:49:03 +0100 Subject: [PATCH 03/16] columns -> on in pivot --- crates/polars-lazy/src/frame/pivot.rs | 24 ++----- crates/polars-ops/src/frame/pivot/mod.rs | 28 ++++---- docs/releases/upgrade/1.md | 2 +- docs/src/python/user-guide/lazy/schema.py | 2 +- .../user-guide/transformations/pivot.py | 4 +- py-polars/polars/dataframe/frame.py | 36 ++++++----- py-polars/polars/lazyframe/frame.py | 15 +++-- py-polars/polars/selectors.py | 2 +- py-polars/src/dataframe/general.rs | 6 +- py-polars/tests/unit/operations/test_pivot.py | 64 +++++++++---------- .../tests/unit/operations/test_unpivot.py | 35 +++++----- 11 files changed, 106 insertions(+), 112 deletions(-) diff --git a/crates/polars-lazy/src/frame/pivot.rs b/crates/polars-lazy/src/frame/pivot.rs index e440b6e22bc6..8c73c61201cb 100644 --- a/crates/polars-lazy/src/frame/pivot.rs +++ b/crates/polars-lazy/src/frame/pivot.rs @@ -33,7 +33,7 @@ impl PhysicalAggExpr for PivotExpr { pub fn pivot( df: &DataFrame, index: I0, - columns: I1, + on: I1, values: Option, sort_columns: bool, agg_expr: Option, @@ -53,21 +53,13 @@ where let expr = prepare_eval_expr(agg_expr); PivotAgg::Expr(Arc::new(PivotExpr(expr))) }); - polars_ops::pivot::pivot( - df, - index, - columns, - values, - sort_columns, - agg_expr, - separator, - ) + polars_ops::pivot::pivot(df, index, on, values, sort_columns, agg_expr, separator) } pub fn pivot_stable( df: &DataFrame, index: I0, - columns: I1, + on: I1, values: Option, sort_columns: bool, agg_expr: Option, @@ -87,13 +79,5 @@ where let expr = prepare_eval_expr(agg_expr); PivotAgg::Expr(Arc::new(PivotExpr(expr))) }); - polars_ops::pivot::pivot_stable( - df, - index, - columns, - values, - sort_columns, - agg_expr, - separator, - ) + polars_ops::pivot::pivot_stable(df, index, on, values, sort_columns, agg_expr, separator) } diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index 6b045dc418bc..ef53e17905d1 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -85,7 +85,7 @@ fn restore_logical_type(s: &Series, logical_type: &DataType) -> Series { pub fn pivot( pivot_df: &DataFrame, index: I0, - columns: I1, + on: I1, values: Option, sort_columns: bool, agg_fn: Option, @@ -103,7 +103,7 @@ where .into_iter() .map(|s| s.as_ref().to_string()) .collect::>(); - let columns = columns + let columns = on .into_iter() .map(|s| s.as_ref().to_string()) .collect::>(); @@ -128,7 +128,7 @@ where pub fn pivot_stable( pivot_df: &DataFrame, index: I0, - columns: I1, + on: I1, values: Option, sort_columns: bool, agg_fn: Option, @@ -146,15 +146,15 @@ where .into_iter() .map(|s| s.as_ref().to_string()) .collect::>(); - let columns = columns + let on = on .into_iter() .map(|s| s.as_ref().to_string()) .collect::>(); - let values = get_values_columns(pivot_df, &index, &columns, values); + let values = get_values_columns(pivot_df, &index, &on, values); pivot_impl( pivot_df, &index, - &columns, + &on, &values, agg_fn, sort_columns, @@ -170,7 +170,7 @@ where fn get_values_columns( df: &DataFrame, index: &[String], - columns: &[String], + on: &[String], values: Option, ) -> Vec where @@ -183,7 +183,7 @@ where .get_column_names() .into_iter() .map(|c| c.to_string()) - .filter(|c| !(index.contains(c) | columns.contains(c))) + .filter(|c| !(index.contains(c) | on.contains(c))) .collect(), } } @@ -195,7 +195,7 @@ fn pivot_impl( index: &[String], // these columns will be used for a nested group_by // the rows of this nested group_by will be pivoted as header column values - columns: &[String], + on: &[String], // these columns will be aggregated in the nested group_by values: &[String], // aggregation function @@ -206,15 +206,15 @@ fn pivot_impl( separator: Option<&str>, ) -> PolarsResult { polars_ensure!(!index.is_empty(), ComputeError: "index cannot be zero length"); - polars_ensure!(!columns.is_empty(), ComputeError: "columns cannot be zero length"); + polars_ensure!(!on.is_empty(), ComputeError: "columns cannot be zero length"); if !stable { println!("unstable pivot not yet supported, using stable pivot"); }; - if columns.len() > 1 { + if on.len() > 1 { let schema = Arc::new(pivot_df.schema()); - let binding = pivot_df.select_with_schema(columns, &schema)?; + let binding = pivot_df.select_with_schema(on, &schema)?; let fields = binding.get_columns(); - let column = format!("{{\"{}\"}}", columns.join("\",\"")); + let column = format!("{{\"{}\"}}", on.join("\",\"")); if schema.contains(column.as_str()) { polars_bail!(ComputeError: "cannot use column name {column} that \ already exists in the DataFrame. Please rename it prior to calling `pivot`.") @@ -235,7 +235,7 @@ fn pivot_impl( pivot_impl_single_column( pivot_df, index, - unsafe { columns.get_unchecked(0) }, + unsafe { on.get_unchecked(0) }, values, agg_fn, sort_columns, diff --git a/docs/releases/upgrade/1.md b/docs/releases/upgrade/1.md index b4210eea33e2..0e5664e64e4e 100644 --- a/docs/releases/upgrade/1.md +++ b/docs/releases/upgrade/1.md @@ -393,7 +393,7 @@ After: ... "test_2": [100, 100, 60, 60], ... } ... ) ->>> df.pivot(index='name', columns='subject', values=['test_1', 'test_2']) +>>> df.pivot(index='name', on='subject', values=['test_1', 'test_2']) ┌───────┬──────────────┬────────────────┬──────────────┬────────────────┐ │ name ┆ test_1_maths ┆ test_1_physics ┆ test_2_maths ┆ test_2_physics │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ diff --git a/docs/src/python/user-guide/lazy/schema.py b/docs/src/python/user-guide/lazy/schema.py index 1c2cef1447be..3c5cbd40c1ab 100644 --- a/docs/src/python/user-guide/lazy/schema.py +++ b/docs/src/python/user-guide/lazy/schema.py @@ -34,7 +34,7 @@ .with_columns((2 * pl.col("values")).alias("double_values")) .collect() .pivot( - index="id", columns="month", values="double_values", aggregate_function="first" + index="id", on="month", values="double_values", aggregate_function="first" ) .lazy() .filter(pl.col("mar").is_null()) diff --git a/docs/src/python/user-guide/transformations/pivot.py b/docs/src/python/user-guide/transformations/pivot.py index d80b26ee0c34..a51db55bbdf4 100644 --- a/docs/src/python/user-guide/transformations/pivot.py +++ b/docs/src/python/user-guide/transformations/pivot.py @@ -15,7 +15,7 @@ # --8<-- [end:df] # --8<-- [start:eager] -out = df.pivot(index="foo", columns="bar", values="N", aggregate_function="first") +out = df.pivot(index="foo", on="bar", values="N", aggregate_function="first") print(out) # --8<-- [end:eager] @@ -23,7 +23,7 @@ q = ( df.lazy() .collect() - .pivot(index="foo", columns="bar", values="N", aggregate_function="first") + .pivot(index="foo", on="bar", values="N", aggregate_function="first") .lazy() ) out = q.collect() diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index d55a1085198d..6485f371bb5e 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7586,10 +7586,11 @@ def explode( """ return self.lazy().explode(columns, *more_columns).collect(_eager=True) + @deprecate_renamed_parameter("columns", "on", version="1.0.0") def pivot( self, index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, - columns: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, + on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, values: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, aggregate_function: PivotAgg | Expr | None = None, *, @@ -7609,7 +7610,7 @@ def pivot( Column values to aggregate. If None, all remaining columns will be used. index One or multiple keys to group by. - columns + on Name of the column(s) whose values will be used as the header of the output DataFrame. aggregate_function @@ -7643,7 +7644,7 @@ def pivot( ... "baz": [1, 2, 3, 4, 5, 6], ... } ... ) - >>> df.pivot(index="foo", columns="bar", values="baz", aggregate_function="sum") + >>> df.pivot(index="foo", on="bar", values="baz", aggregate_function="sum") shape: (2, 3) ┌─────┬─────┬─────┐ │ foo ┆ y ┆ x │ @@ -7659,7 +7660,7 @@ def pivot( >>> import polars.selectors as cs >>> df.pivot( ... index=cs.string(), - ... columns=cs.string(), + ... on=cs.string(), ... values=cs.numeric(), ... aggregate_function="sum", ... sort_columns=True, @@ -7689,7 +7690,7 @@ def pivot( ... ) >>> df.pivot( ... index="col1", - ... columns="col2", + ... on="col2", ... values="col3", ... aggregate_function=pl.element().tanh().mean(), ... ) @@ -7708,12 +7709,12 @@ def pivot( get the same result as above in lazy mode: >>> index = pl.col("col1") - >>> columns = pl.col("col2") + >>> on = pl.col("col2") >>> values = pl.col("col3") >>> unique_column_values = ["x", "y"] >>> aggregate_function = lambda col: col.tanh().mean() >>> df.lazy().group_by(index).agg( - ... aggregate_function(values.filter(columns == value)).alias(value) + ... aggregate_function(values.filter(on == value)).alias(value) ... for value in unique_column_values ... ).collect() # doctest: +IGNORE_RESULT shape: (2, 3) @@ -7738,7 +7739,7 @@ def pivot( ... ) >>> df.pivot( ... index="ix", - ... columns="col", + ... on="col", ... values=["foo", "bar"], ... aggregate_function="sum", ... separator="/", @@ -7754,7 +7755,7 @@ def pivot( └─────┴───────┴───────┴───────┴───────┘ """ # noqa: W505 index = _expand_selectors(self, index) - columns = _expand_selectors(self, columns) + on = _expand_selectors(self, on) if values is not None: values = _expand_selectors(self, values) @@ -7793,7 +7794,7 @@ def pivot( return self._from_pydf( self._df.pivot_expr( index, - columns, + on, values, maintain_order, sort_columns, @@ -10676,7 +10677,8 @@ def count(self) -> DataFrame: return self.lazy().count().collect(_eager=True) @deprecate_function( - "Use `unpivot` instead, with `index` instead of `id_vars` and `on` instead of `value_vaars`", version="1.0.0" + "Use `unpivot` instead, with `index` instead of `id_vars` and `on` instead of `value_vars`", + version="1.0.0", ) def melt( self, @@ -10700,9 +10702,9 @@ def melt( Parameters ---------- - index + id_vars Column(s) or selector(s) to use as identifier variables. - on + value_vars Column(s) or selector(s) to use as values variables; if `on` is empty all columns that are not in `index` will be used. variable_name @@ -10710,8 +10712,12 @@ def melt( value_name Name to give to the `value` column. Defaults to "value" """ - return self.unpivot(index=id_vars, on=value_vars, variable_name=variable_name, value_name=value_name) - + return self.unpivot( + index=id_vars, + on=value_vars, + variable_name=variable_name, + value_name=value_name, + ) def _prepare_other_arg(other: Any, length: int | None = None) -> Series: diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 97a27603794b..1ffeb79c9413 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -6308,7 +6308,8 @@ def count(self) -> Self: return self._from_pyldf(self._ldf.count()) @deprecate_function( - "Use `unpivot` instead, with `index` instead of `id_vars` and `on` instead of `value_vaars`", version="1.0.0" + "Use `unpivot` instead, with `index` instead of `id_vars` and `on` instead of `value_vars`", + version="1.0.0", ) def melt( self, @@ -6334,9 +6335,9 @@ def melt( Parameters ---------- - index + id_vars Column(s) or selector(s) to use as identifier variables. - on + value_vars Column(s) or selector(s) to use as values variables; if `on` is empty all columns that are not in `index` will be used. variable_name @@ -6348,4 +6349,10 @@ def melt( If this runs in streaming, the output of the unpivot operation will not have a stable ordering. """ - return self.unpivot(index=id_vars, on=value_vars, variable_name=variable_name, value_name=value_name, streamable=streamable) + return self.unpivot( + index=id_vars, + on=value_vars, + variable_name=variable_name, + value_name=value_name, + streamable=streamable, + ) diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py index bf22336a273c..628c4d9fddb0 100644 --- a/py-polars/polars/selectors.py +++ b/py-polars/polars/selectors.py @@ -1567,7 +1567,7 @@ def digit(ascii_only: bool = False) -> SelectorType: # noqa: FBT001 ... ).pivot( ... values="value", ... index="key", - ... columns="year", + ... on="year", ... aggregate_function="sum", ... ) >>> print(df) diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index 340f5d1837ba..71031c9aabc7 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -417,11 +417,11 @@ impl PyDataFrame { } #[cfg(feature = "pivot")] - #[pyo3(signature = (index, columns, values, maintain_order, sort_columns, aggregate_expr, separator))] + #[pyo3(signature = (index, on, values, maintain_order, sort_columns, aggregate_expr, separator))] pub fn pivot_expr( &self, index: Vec, - columns: Vec, + on: Vec, values: Option>, maintain_order: bool, sort_columns: bool, @@ -433,7 +433,7 @@ impl PyDataFrame { let df = fun( &self.df, index, - columns, + on, values, sort_columns, agg_expr, diff --git a/py-polars/tests/unit/operations/test_pivot.py b/py-polars/tests/unit/operations/test_pivot.py index b45d158fd704..ca92aeb48434 100644 --- a/py-polars/tests/unit/operations/test_pivot.py +++ b/py-polars/tests/unit/operations/test_pivot.py @@ -22,7 +22,7 @@ def test_pivot() -> None: "N": [1, 2, 2, 4, 2], } ) - result = df.pivot(index="foo", columns="bar", values="N", aggregate_function=None) + result = df.pivot(index="foo", on="bar", values="N", aggregate_function=None) expected = pl.DataFrame( [ @@ -45,7 +45,7 @@ def test_pivot_no_values() -> None: "N2": [1, 2, 2, 4, 2], } ) - result = df.pivot(index="foo", columns="bar", values=None, aggregate_function=None) + result = df.pivot(index="foo", on="bar", values=None, aggregate_function=None) expected = pl.DataFrame( { "foo": ["A", "B", "C"], @@ -78,7 +78,7 @@ def test_pivot_list() -> None: ) out = df.pivot( index="a", - columns="a", + on="a", values="b", aggregate_function="first", sort_columns=True, @@ -107,7 +107,7 @@ def test_pivot_aggregate(agg_fn: PivotAgg, expected_rows: list[tuple[Any]]) -> N } ) result = df.pivot( - index="b", columns="a", values="c", aggregate_function=agg_fn, sort_columns=True + index="b", on="a", values="c", aggregate_function=agg_fn, sort_columns=True ) assert result.rows() == expected_rows @@ -140,12 +140,12 @@ def test_pivot_categorical_index() -> None: schema=[("A", pl.Categorical), ("B", pl.Categorical)], ) - result = df.pivot(index=["A"], columns="B", values="B", aggregate_function="len") + result = df.pivot(index=["A"], on="B", values="B", aggregate_function="len") expected = {"A": ["Fire", "Water"], "Car": [1, 2], "Ship": [1, None]} assert result.to_dict(as_series=False) == expected # test expression dispatch - result = df.pivot(index=["A"], columns="B", values="B", aggregate_function=pl.len()) + result = df.pivot(index=["A"], on="B", values="B", aggregate_function=pl.len()) assert result.to_dict(as_series=False) == expected df = pl.DataFrame( @@ -156,9 +156,7 @@ def test_pivot_categorical_index() -> None: }, schema=[("A", pl.Categorical), ("B", pl.Categorical), ("C", pl.Categorical)], ) - result = df.pivot( - index=["A", "C"], columns="B", values="B", aggregate_function="len" - ) + result = df.pivot(index=["A", "C"], on="B", values="B", aggregate_function="len") expected = { "A": ["Fire", "Water"], "C": ["Paper", "Paper"], @@ -181,7 +179,7 @@ def test_pivot_multiple_values_column_names_5116() -> None: with pytest.raises(ComputeError, match="found multiple elements in the same group"): df.pivot( index="c1", - columns="c2", + on="c2", values=["x1", "x2"], separator="|", aggregate_function=None, @@ -189,7 +187,7 @@ def test_pivot_multiple_values_column_names_5116() -> None: result = df.pivot( index="c1", - columns="c2", + on="c2", values=["x1", "x2"], separator="|", aggregate_function="first", @@ -216,7 +214,7 @@ def test_pivot_duplicate_names_7731() -> None: ) result = df.pivot( index=cs.float(), - columns=cs.string(), + on=cs.string(), values=cs.integer(), aggregate_function="first", ).to_dict(as_series=False) @@ -232,9 +230,7 @@ def test_pivot_duplicate_names_7731() -> None: def test_pivot_duplicate_names_11663() -> None: df = pl.DataFrame({"a": [1, 2], "b": [1, 2], "c": ["x", "x"], "d": ["x", "y"]}) - result = df.pivot(index="b", columns=["c", "d"], values="a").to_dict( - as_series=False - ) + result = df.pivot(index="b", on=["c", "d"], values="a").to_dict(as_series=False) expected = {"b": [1, 2], '{"x","x"}': [1, None], '{"x","y"}': [None, 2]} assert result == expected @@ -250,7 +246,7 @@ def test_pivot_multiple_columns_12407() -> None: } ) result = df.pivot( - index="b", columns=["c", "e"], values=["a"], aggregate_function="len" + index="b", on=["c", "e"], values=["a"], aggregate_function="len" ).to_dict(as_series=False) expected = {"b": ["a", "b"], '{"s","x"}': [1, None], '{"f","y"}': [None, 1]} assert result == expected @@ -268,7 +264,7 @@ def test_pivot_struct_13120() -> None: df = df.with_columns(pl.struct(["item_type", "item_id"]).alias("columns")).drop( "item_type", "item_id" ) - result = df.pivot(index="index", columns="columns", values="values").to_dict( + result = df.pivot(index="index", on="columns", values="values").to_dict( as_series=False ) expected = {"index": [1, 2, 3], '{"a",123}': [4, 5, 6], '{"b",456}': [7, 8, 9]} @@ -284,7 +280,7 @@ def test_pivot_index_struct_14101() -> None: "d": [1, 1, 3], } ) - result = df.pivot(index="b", columns="c", values="a") + result = df.pivot(index="b", on="c", values="a") expected = pl.DataFrame({"b": [{"a": 1}, {"a": 2}], "x": [1, None], "y": [2, 1]}) assert_frame_equal(result, expected) @@ -302,7 +298,7 @@ def test_pivot_name_already_exists() -> None: df.pivot( values='{"a","b"}', index="a", - columns=["a", "b"], + on=["a", "b"], aggregate_function="first", ) @@ -319,11 +315,11 @@ def test_pivot_floats() -> None: with pytest.raises(ComputeError, match="found multiple elements in the same group"): result = df.pivot( - index="weight", columns="quantity", values="price", aggregate_function=None + index="weight", on="quantity", values="price", aggregate_function=None ) result = df.pivot( - index="weight", columns="quantity", values="price", aggregate_function="first" + index="weight", on="quantity", values="price", aggregate_function="first" ) expected = { "weight": [1.0, 4.4, 8.8], @@ -335,7 +331,7 @@ def test_pivot_floats() -> None: result = df.pivot( index=["article", "weight"], - columns="quantity", + on="quantity", values="price", aggregate_function=None, ) @@ -359,7 +355,7 @@ def test_pivot_reinterpret_5907() -> None: ) result = df.pivot( - index=["A"], columns=["B"], values=["C"], aggregate_function=pl.element().sum() + index=["A"], on=["B"], values=["C"], aggregate_function=pl.element().sum() ) expected = {"A": [3, -2], "x": [100, 50], "y": [500, -80]} assert result.to_dict(as_series=False) == expected @@ -376,7 +372,7 @@ def test_pivot_temporal_logical_types() -> None: } ) assert df.pivot( - index="idx", columns="foo", values="value", aggregate_function=None + index="idx", on="foo", values="value", aggregate_function=None ).to_dict(as_series=False) == { "idx": [ datetime(1977, 1, 1, 0, 0), @@ -401,7 +397,7 @@ def test_pivot_negative_duration() -> None: pl.Series(name="value", values=range(len(df1) * len(df2))) ) assert df.pivot( - index="delta", columns="root", values="value", aggregate_function=None + index="delta", on="root", values="value", aggregate_function=None ).to_dict(as_series=False) == { "delta": [ timedelta(days=-2), @@ -417,7 +413,7 @@ def test_pivot_negative_duration() -> None: def test_aggregate_function_default() -> None: df = pl.DataFrame({"a": [1, 2], "b": ["foo", "foo"], "c": ["x", "x"]}) with pytest.raises(ComputeError, match="found multiple elements in the same group"): - df.pivot(index="b", columns="c", values="a") + df.pivot(index="b", on="c", values="a") def test_pivot_aggregate_function_count_deprecated() -> None: @@ -429,7 +425,7 @@ def test_pivot_aggregate_function_count_deprecated() -> None: } ) with pytest.deprecated_call(): - df.pivot(index="foo", columns="bar", values="N", aggregate_function="count") # type: ignore[arg-type] + df.pivot(index="foo", on="bar", values="N", aggregate_function="count") # type: ignore[arg-type] def test_pivot_struct() -> None: @@ -442,7 +438,7 @@ def test_pivot_struct() -> None: df = pl.DataFrame(data).with_columns(nums=pl.struct(["num1", "num2"])) assert df.pivot( - values="nums", index="id", columns="week", aggregate_function="first" + values="nums", index="id", on="week", aggregate_function="first" ).to_dict(as_series=False) == { "id": ["a", "b", "c"], "1": [ @@ -471,7 +467,7 @@ def test_pivot_struct() -> None: def test_duplicate_column_names_which_should_raise_14305() -> None: df = pl.DataFrame({"a": [1, 3, 2], "c": ["a", "a", "a"], "d": [7, 8, 9]}) with pytest.raises(DuplicateError, match="has more than one occurrences"): - df.pivot(index="a", columns="c", values="d") + df.pivot(index="a", on="c", values="d") def test_multi_index_containing_struct() -> None: @@ -483,7 +479,7 @@ def test_multi_index_containing_struct() -> None: "d": [1, 1, 3], } ) - result = df.pivot(index=("b", "d"), columns="c", values="a") + result = df.pivot(index=("b", "d"), on="c", values="a") expected = pl.DataFrame( {"b": [{"a": 1}, {"a": 2}], "d": [1, 3], "x": [1, None], "y": [2, 1]} ) @@ -501,7 +497,7 @@ def test_list_pivot() -> None: ) assert df.pivot( index=["a", "b"], - columns="c", + on="c", values="d", ).to_dict(as_series=False) == { "a": [1, 2, 3], @@ -519,9 +515,9 @@ def test_pivot_string_17081() -> None: "c": ["7", "8", "9"], } ) - assert df.pivot( - index="a", columns="b", values="c", aggregate_function="min" - ).to_dict(as_series=False) == { + assert df.pivot(index="a", on="b", values="c", aggregate_function="min").to_dict( + as_series=False + ) == { "a": ["1", "2", "3"], "4": ["7", None, None], "5": [None, "8", None], diff --git a/py-polars/tests/unit/operations/test_unpivot.py b/py-polars/tests/unit/operations/test_unpivot.py index dda5f212ca1c..9dc758d1eb98 100644 --- a/py-polars/tests/unit/operations/test_unpivot.py +++ b/py-polars/tests/unit/operations/test_unpivot.py @@ -32,22 +32,23 @@ def test_unpivot() -> None: "6", ] - for unpivoted in [ - df.unpivot(value_name="foo", variable_name="bar"), - df.lazy().unpivot(value_name="foo", variable_name="bar").collect(), - ]: - assert unpivoted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n - assert unpivoted["foo"].to_list() == [ - "a", - "b", - "c", - "1", - "3", - "5", - "2", - "4", - "6", - ] + with pytest.deprecated_call(match="unpivot"): + for unpivoted in [ + df.melt(value_name="foo", variable_name="bar"), + df.lazy().melt(value_name="foo", variable_name="bar").collect(), + ]: + assert unpivoted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n + assert unpivoted["foo"].to_list() == [ + "a", + "b", + "c", + "1", + "3", + "5", + "2", + "4", + "6", + ] def test_unpivot_projection_pd_7747() -> None: @@ -58,7 +59,7 @@ def test_unpivot_projection_pd_7747() -> None: "weight": [100, 103, 95, 90, 110], } ) - with pytest.deprecated_call(): + with pytest.deprecated_call(match="unpivot"): result = ( df.with_columns(pl.col("age").alias("wgt")) .melt(id_vars="number", value_vars="wgt") From bc6603eb1efd1b205fc9dc513f3a818b97cc0e92 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 08:52:55 +0100 Subject: [PATCH 04/16] fixup --- docs/src/python/user-guide/lazy/schema.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/src/python/user-guide/lazy/schema.py b/docs/src/python/user-guide/lazy/schema.py index 3c5cbd40c1ab..1b8971b84597 100644 --- a/docs/src/python/user-guide/lazy/schema.py +++ b/docs/src/python/user-guide/lazy/schema.py @@ -33,9 +33,7 @@ ) .with_columns((2 * pl.col("values")).alias("double_values")) .collect() - .pivot( - index="id", on="month", values="double_values", aggregate_function="first" - ) + .pivot(index="id", on="month", values="double_values", aggregate_function="first") .lazy() .filter(pl.col("mar").is_null()) .collect() From 965452f000c855364de6ffedc51a9849489484c6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 08:54:44 +0100 Subject: [PATCH 05/16] columns -> on --- crates/polars-ops/src/frame/pivot/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index ef53e17905d1..f51fbad0590b 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -103,15 +103,15 @@ where .into_iter() .map(|s| s.as_ref().to_string()) .collect::>(); - let columns = on + let on = on .into_iter() .map(|s| s.as_ref().to_string()) .collect::>(); - let values = get_values_columns(pivot_df, &index, &columns, values); + let values = get_values_columns(pivot_df, &index, &on, values); pivot_impl( pivot_df, &index, - &columns, + &on, &values, agg_fn, sort_columns, From 6cce611e2d59b52baec16e751c1112bc414d4e1c Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 08:57:13 +0100 Subject: [PATCH 06/16] update err msg --- crates/polars-ops/src/frame/pivot/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index f51fbad0590b..688524662037 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -206,7 +206,7 @@ fn pivot_impl( separator: Option<&str>, ) -> PolarsResult { polars_ensure!(!index.is_empty(), ComputeError: "index cannot be zero length"); - polars_ensure!(!on.is_empty(), ComputeError: "columns cannot be zero length"); + polars_ensure!(!on.is_empty(), ComputeError: "`on` cannot be zero length"); if !stable { println!("unstable pivot not yet supported, using stable pivot"); }; From de183ecfd8b8eac886cbd6070dd964f0d3926213 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 09:08:00 +0100 Subject: [PATCH 07/16] one more arg name in docs --- py-polars/polars/dataframe/frame.py | 8 ++++---- py-polars/polars/lazyframe/frame.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 6485f371bb5e..c665c0071a14 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -10693,8 +10693,8 @@ def melt( Optionally leaves identifiers set. This function is useful to massage a DataFrame into a format where one or more - columns are identifier variables (index) while all other columns, considered - measured variables (on), are "unpivoted" to the row axis leaving just + columns are identifier variables (id_vars) while all other columns, considered + measured variables (value_vars), are "unpivoted" to the row axis leaving just two non-identifier columns, 'variable' and 'value'. .. deprecated 1.0.0 @@ -10705,8 +10705,8 @@ def melt( id_vars Column(s) or selector(s) to use as identifier variables. value_vars - Column(s) or selector(s) to use as values variables; if `on` - is empty all columns that are not in `index` will be used. + Column(s) or selector(s) to use as values variables; if `value_vars` + is empty all columns that are not in `id_vars` will be used. variable_name Name to give to the `variable` column. Defaults to "variable" value_name diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 1ffeb79c9413..c5154061eace 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -6326,8 +6326,8 @@ def melt( Optionally leaves identifiers set. This function is useful to massage a DataFrame into a format where one or more - columns are identifier variables (index) while all other columns, considered - measured variables (on), are "unpivoted" to the row axis leaving just + columns are identifier variables (id_vars) while all other columns, considered + measured variables (value_vars), are "unpivoted" to the row axis leaving just two non-identifier columns, 'variable' and 'value'. .. deprecated 1.0.0 @@ -6338,8 +6338,8 @@ def melt( id_vars Column(s) or selector(s) to use as identifier variables. value_vars - Column(s) or selector(s) to use as values variables; if `on` - is empty all columns that are not in `index` will be used. + Column(s) or selector(s) to use as values variables; if `value_vars` + is empty all columns that are not in `id_vars` will be used. variable_name Name to give to the `variable` column. Defaults to "variable" value_name From 0219f0bcc08d5209c9c5afe59c0adaea95edbfe9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 12:05:21 +0100 Subject: [PATCH 08/16] change order --- py-polars/polars/dataframe/frame.py | 8 ++++---- py-polars/polars/lazyframe/frame.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c665c0071a14..8d06119f3a74 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7589,8 +7589,8 @@ def explode( @deprecate_renamed_parameter("columns", "on", version="1.0.0") def pivot( self, - index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, values: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, aggregate_function: PivotAgg | Expr | None = None, *, @@ -7805,8 +7805,8 @@ def pivot( def unpivot( self, - index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, ) -> Self: @@ -7822,11 +7822,11 @@ def unpivot( Parameters ---------- - index - Column(s) or selector(s) to use as identifier variables. on Column(s) or selector(s) to use as values variables; if `on` is empty all columns that are not in `index` will be used. + index + Column(s) or selector(s) to use as identifier variables. variable_name Name to give to the `variable` column. Defaults to "variable" value_name diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index c5154061eace..b0bcd0d50dcc 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5705,8 +5705,8 @@ def drop_nulls( def unpivot( self, - index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, *, @@ -5724,11 +5724,11 @@ def unpivot( Parameters ---------- - index - Column(s) or selector(s) to use as identifier variables. on Column(s) or selector(s) to use as values variables; if `on` is empty all columns that are not in `index` will be used. + index + Column(s) or selector(s) to use as identifier variables. variable_name Name to give to the `variable` column. Defaults to "variable" value_name From 9de041c86f394ac016f783787af435d618989ff2 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:13:44 +0100 Subject: [PATCH 09/16] enforce keyword-only --- py-polars/polars/dataframe/frame.py | 3 ++- py-polars/polars/lazyframe/frame.py | 2 +- py-polars/tests/unit/operations/test_filter.py | 2 +- py-polars/tests/unit/operations/test_unpivot.py | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 8d06119f3a74..f18fd1ab23f0 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7589,11 +7589,11 @@ def explode( @deprecate_renamed_parameter("columns", "on", version="1.0.0") def pivot( self, + *, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, values: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, aggregate_function: PivotAgg | Expr | None = None, - *, maintain_order: bool = True, sort_columns: bool = False, separator: str = "_", @@ -7805,6 +7805,7 @@ def pivot( def unpivot( self, + *, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index b0bcd0d50dcc..5e0728a937cd 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5705,11 +5705,11 @@ def drop_nulls( def unpivot( self, + *, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, - *, streamable: bool = True, ) -> Self: """ diff --git a/py-polars/tests/unit/operations/test_filter.py b/py-polars/tests/unit/operations/test_filter.py index 85db19b228aa..d1c0de20a04b 100644 --- a/py-polars/tests/unit/operations/test_filter.py +++ b/py-polars/tests/unit/operations/test_filter.py @@ -40,7 +40,7 @@ def test_unpivot_values_predicate_pushdown() -> None: ).lazy() assert ( - lf.unpivot("id", ["asset_key_1", "asset_key_2", "asset_key_3"]) + lf.unpivot(index="id", on=["asset_key_1", "asset_key_2", "asset_key_3"]) .filter(pl.col("value") == pl.lit("123")) .collect() ).to_dict(as_series=False) == { diff --git a/py-polars/tests/unit/operations/test_unpivot.py b/py-polars/tests/unit/operations/test_unpivot.py index 9dc758d1eb98..a4155da56874 100644 --- a/py-polars/tests/unit/operations/test_unpivot.py +++ b/py-polars/tests/unit/operations/test_unpivot.py @@ -76,10 +76,10 @@ def test_unpivot_projection_pd_7747() -> None: # https://github.com/pola-rs/polars/issues/10075 -def test_unpivot_no_value_vars() -> None: +def test_unpivot_no_on() -> None: lf = pl.LazyFrame({"a": [1, 2, 3]}) - result = lf.unpivot("a") + result = lf.unpivot(index="a") expected = pl.LazyFrame( schema={"a": pl.Int64, "variable": pl.String, "value": pl.Null} From 3ba6382dece2ad161c213fb2f19dcd4fb05cf670 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:02:10 +0100 Subject: [PATCH 10/16] on positional, values and index keyword --- crates/polars-core/src/frame/explode.rs | 8 ++++---- crates/polars-lazy/src/frame/pivot.rs | 6 +++--- crates/polars-ops/src/frame/pivot/mod.rs | 8 ++++---- py-polars/polars/dataframe/frame.py | 22 +++++++++++----------- py-polars/polars/lazyframe/frame.py | 2 +- py-polars/src/dataframe/general.rs | 6 +++--- py-polars/src/lazyframe/mod.rs | 10 +++++----- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index 047497aa8a11..0f0d7a86d282 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -244,11 +244,11 @@ impl DataFrame { I: IntoVec, J: IntoVec, { - let id_vars = index.into_vec(); - let value_vars = on.into_vec(); + let index = index.into_vec(); + let on = on.into_vec(); self.unpivot2(UnpivotArgs { - index: id_vars, - on: value_vars, + index, + on, ..Default::default() }) } diff --git a/crates/polars-lazy/src/frame/pivot.rs b/crates/polars-lazy/src/frame/pivot.rs index 8c73c61201cb..f6fc6851739e 100644 --- a/crates/polars-lazy/src/frame/pivot.rs +++ b/crates/polars-lazy/src/frame/pivot.rs @@ -32,8 +32,8 @@ impl PhysicalAggExpr for PivotExpr { pub fn pivot( df: &DataFrame, - index: I0, on: I1, + index: I0, values: Option, sort_columns: bool, agg_expr: Option, @@ -58,8 +58,8 @@ where pub fn pivot_stable( df: &DataFrame, - index: I0, - on: I1, + on: I0, + index: I1, values: Option, sort_columns: bool, agg_expr: Option, diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index 688524662037..194f3a2c5d69 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -84,8 +84,8 @@ fn restore_logical_type(s: &Series, logical_type: &DataType) -> Series { /// If you have a relatively large table, consider using a group_by over a pivot. pub fn pivot( pivot_df: &DataFrame, - index: I0, - on: I1, + on: I0, + index: I1, values: Option, sort_columns: bool, agg_fn: Option, @@ -127,8 +127,8 @@ where /// If you have a relatively large table, consider using a group_by over a pivot. pub fn pivot_stable( pivot_df: &DataFrame, - index: I0, - on: I1, + on: I0, + index: I1, values: Option, sort_columns: bool, agg_fn: Option, diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index f18fd1ab23f0..9cc63ceb9590 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7589,9 +7589,9 @@ def explode( @deprecate_renamed_parameter("columns", "on", version="1.0.0") def pivot( self, + on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector], *, - on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, - index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector], values: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None, aggregate_function: PivotAgg | Expr | None = None, maintain_order: bool = True, @@ -7606,13 +7606,13 @@ def pivot( Parameters ---------- + on + Name of the column(s) whose values will be used as the header of the output + DataFrame. values Column values to aggregate. If None, all remaining columns will be used. index One or multiple keys to group by. - on - Name of the column(s) whose values will be used as the header of the output - DataFrame. aggregate_function Choose from: @@ -7659,8 +7659,8 @@ def pivot( >>> import polars.selectors as cs >>> df.pivot( + ... cs.string(), ... index=cs.string(), - ... on=cs.string(), ... values=cs.numeric(), ... aggregate_function="sum", ... sort_columns=True, @@ -7689,8 +7689,8 @@ def pivot( ... } ... ) >>> df.pivot( + ... "col2", ... index="col1", - ... on="col2", ... values="col3", ... aggregate_function=pl.element().tanh().mean(), ... ) @@ -7738,8 +7738,8 @@ def pivot( ... } ... ) >>> df.pivot( + ... "col", ... index="ix", - ... on="col", ... values=["foo", "bar"], ... aggregate_function="sum", ... separator="/", @@ -7793,8 +7793,8 @@ def pivot( return self._from_pydf( self._df.pivot_expr( - index, on, + index, values, maintain_order, sort_columns, @@ -7805,8 +7805,8 @@ def pivot( def unpivot( self, - *, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + *, index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, @@ -7867,7 +7867,7 @@ def unpivot( on = [] if on is None else _expand_selectors(self, on) index = [] if index is None else _expand_selectors(self, index) - return self._from_pydf(self._df.unpivot(index, on, value_name, variable_name)) + return self._from_pydf(self._df.unpivot(on, index, value_name, variable_name)) @unstable() def unstack( diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 5e0728a937cd..fd7d0fb6e4be 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5773,7 +5773,7 @@ def unpivot( index = [] if index is None else _expand_selectors(self, index) return self._from_pyldf( - self._ldf.unpivot(index, on, value_name, variable_name, streamable) + self._ldf.unpivot(on, index, value_name, variable_name, streamable) ) def map_batches( diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index 71031c9aabc7..421afe0c5995 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -399,8 +399,8 @@ impl PyDataFrame { pub fn unpivot( &self, - index: Vec, on: Vec, + index: Vec, value_name: Option<&str>, variable_name: Option<&str>, ) -> PyResult { @@ -417,11 +417,11 @@ impl PyDataFrame { } #[cfg(feature = "pivot")] - #[pyo3(signature = (index, on, values, maintain_order, sort_columns, aggregate_expr, separator))] + #[pyo3(signature = (on, index, values, maintain_order, sort_columns, aggregate_expr, separator))] pub fn pivot_expr( &self, - index: Vec, on: Vec, + index: Vec, values: Option>, maintain_order: bool, sort_columns: bool, diff --git a/py-polars/src/lazyframe/mod.rs b/py-polars/src/lazyframe/mod.rs index 022114db4e42..277a2c1980a7 100644 --- a/py-polars/src/lazyframe/mod.rs +++ b/py-polars/src/lazyframe/mod.rs @@ -1111,18 +1111,18 @@ impl PyLazyFrame { ldf.tail(n).into() } - #[pyo3(signature = (id_vars, value_vars, value_name, variable_name, streamable))] + #[pyo3(signature = (on, index, value_name, variable_name, streamable))] fn unpivot( &self, - id_vars: Vec, - value_vars: Vec, + on: Vec, + index: Vec, value_name: Option, variable_name: Option, streamable: bool, ) -> Self { let args = UnpivotArgs { - index: strings_to_smartstrings(id_vars), - on: strings_to_smartstrings(value_vars), + index: strings_to_smartstrings(index), + on: strings_to_smartstrings(on), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), streamable, From 131bce9eb0ebb370a4becbf5da64214c17592880 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:08:44 +0100 Subject: [PATCH 11/16] fixup rust side --- crates/polars-core/src/frame/explode.rs | 4 ++-- crates/polars-lazy/src/frame/pivot.rs | 8 ++++---- crates/polars/tests/it/core/pivot.rs | 22 +++++++++++----------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index 0f0d7a86d282..c7ed14eca344 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -189,8 +189,8 @@ impl DataFrame { /// /// # Arguments /// - /// * `index` - String slice that represent the columns to use as id variables. /// * `on` - String slice that represent the columns to use as value variables. + /// * `index` - String slice that represent the columns to use as id variables. /// /// If `on` is empty all columns that are not in `index` will be used. /// @@ -441,7 +441,7 @@ mod test { ) .unwrap(); - let unpivoted = df.unpivot(["A", "B"], ["C", "D"])?; + let unpivoted = df.unpivot(["C", "D"], ["A", "B"])?; assert_eq!( Vec::from(unpivoted.column("value")?.i32()?), &[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)] diff --git a/crates/polars-lazy/src/frame/pivot.rs b/crates/polars-lazy/src/frame/pivot.rs index f6fc6851739e..a3b2acab4bd8 100644 --- a/crates/polars-lazy/src/frame/pivot.rs +++ b/crates/polars-lazy/src/frame/pivot.rs @@ -32,8 +32,8 @@ impl PhysicalAggExpr for PivotExpr { pub fn pivot( df: &DataFrame, - on: I1, - index: I0, + on: I0, + index: I1, values: Option, sort_columns: bool, agg_expr: Option, @@ -53,7 +53,7 @@ where let expr = prepare_eval_expr(agg_expr); PivotAgg::Expr(Arc::new(PivotExpr(expr))) }); - polars_ops::pivot::pivot(df, index, on, values, sort_columns, agg_expr, separator) + polars_ops::pivot::pivot(df, on, index, values, sort_columns, agg_expr, separator) } pub fn pivot_stable( @@ -79,5 +79,5 @@ where let expr = prepare_eval_expr(agg_expr); PivotAgg::Expr(Arc::new(PivotExpr(expr))) }); - polars_ops::pivot::pivot_stable(df, index, on, values, sort_columns, agg_expr, separator) + polars_ops::pivot::pivot_stable(df, on, index, values, sort_columns, agg_expr, separator) } diff --git a/crates/polars/tests/it/core/pivot.rs b/crates/polars/tests/it/core/pivot.rs index 6f9c996b44cc..fc0e8481aa8f 100644 --- a/crates/polars/tests/it/core/pivot.rs +++ b/crates/polars/tests/it/core/pivot.rs @@ -15,8 +15,8 @@ fn test_pivot_date_() -> PolarsResult<()> { // Test with date as the `columns` input let out = pivot( &df, - ["index"], ["values1"], + ["index"], Some(["values2"]), true, Some(PivotAgg::Count), @@ -33,8 +33,8 @@ fn test_pivot_date_() -> PolarsResult<()> { // Test with date as the `values` input. let mut out = pivot_stable( &df, - ["index"], ["values2"], + ["index"], Some(["values1"]), true, Some(PivotAgg::First), @@ -63,8 +63,8 @@ fn test_pivot_old() { let pvt = pivot( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), false, Some(PivotAgg::Sum), @@ -78,8 +78,8 @@ fn test_pivot_old() { ); let pvt = pivot( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), false, Some(PivotAgg::Min), @@ -92,8 +92,8 @@ fn test_pivot_old() { ); let pvt = pivot( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), false, Some(PivotAgg::Max), @@ -106,8 +106,8 @@ fn test_pivot_old() { ); let pvt = pivot( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), false, Some(PivotAgg::Mean), @@ -148,8 +148,8 @@ fn test_pivot_categorical() -> PolarsResult<()> { let out = pivot( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), true, Some(PivotAgg::Count), @@ -173,8 +173,8 @@ fn test_pivot_new() -> PolarsResult<()> { let out = (pivot_stable( &df, - ["index1", "index2"], ["cols1"], + ["index1", "index2"], Some(["values1"]), true, Some(PivotAgg::Sum), @@ -190,8 +190,8 @@ fn test_pivot_new() -> PolarsResult<()> { let out = pivot_stable( &df, - ["index1", "index2"], ["cols1", "cols2"], + ["index1", "index2"], Some(["values1"]), true, Some(PivotAgg::Sum), @@ -221,8 +221,8 @@ fn test_pivot_2() -> PolarsResult<()> { let out = pivot_stable( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), false, Some(PivotAgg::First), @@ -254,8 +254,8 @@ fn test_pivot_datetime() -> PolarsResult<()> { let out = pivot( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), false, Some(PivotAgg::Sum), From 8766aa3f3b2f672090a8c86c344172aab90681a8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:19:31 +0100 Subject: [PATCH 12/16] more rust alignment --- crates/polars-core/src/frame/explode.rs | 8 ++++---- crates/polars-lazy/src/tests/queries.rs | 2 +- crates/polars-ops/src/frame/pivot/mod.rs | 8 ++++---- py-polars/src/dataframe/general.rs | 2 +- py-polars/src/lazyframe/mod.rs | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index c7ed14eca344..a405067d834c 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -24,8 +24,8 @@ fn get_exploded(series: &Series) -> PolarsResult<(Series, OffsetsBuffer)> { #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))] pub struct UnpivotArgs { - pub index: Vec, pub on: Vec, + pub index: Vec, pub variable_name: Option, pub value_name: Option, /// Whether the unpivot may be done @@ -247,8 +247,8 @@ impl DataFrame { let index = index.into_vec(); let on = on.into_vec(); self.unpivot2(UnpivotArgs { - index, on, + index, ..Default::default() }) } @@ -448,8 +448,8 @@ mod test { ); let args = UnpivotArgs { - index: vec![], on: vec![], + index: vec![], ..Default::default() }; @@ -464,8 +464,8 @@ mod test { ); let args = UnpivotArgs { - index: vec!["A".into()], on: vec![], + index: vec!["A".into()], ..Default::default() }; diff --git a/crates/polars-lazy/src/tests/queries.rs b/crates/polars-lazy/src/tests/queries.rs index 690dc6f57ea9..ade6df69c57e 100644 --- a/crates/polars-lazy/src/tests/queries.rs +++ b/crates/polars-lazy/src/tests/queries.rs @@ -50,8 +50,8 @@ fn test_lazy_unpivot() { let df = get_df(); let args = UnpivotArgs { - index: vec!["petal_width".into(), "petal_length".into()], on: vec!["sepal_length".into(), "sepal_width".into()], + index: vec!["petal_width".into(), "petal_length".into()], ..Default::default() }; diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index 194f3a2c5d69..11a2d0b64248 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -110,8 +110,8 @@ where let values = get_values_columns(pivot_df, &index, &on, values); pivot_impl( pivot_df, - &index, &on, + &index, &values, agg_fn, sort_columns, @@ -153,8 +153,8 @@ where let values = get_values_columns(pivot_df, &index, &on, values); pivot_impl( pivot_df, - &index, &on, + &index, &values, agg_fn, sort_columns, @@ -192,11 +192,11 @@ where fn pivot_impl( pivot_df: &DataFrame, // keys of the first group_by operation + on: &[String], + // these columns will be aggregated in the nested group_by index: &[String], // these columns will be used for a nested group_by // the rows of this nested group_by will be pivoted as header column values - on: &[String], - // these columns will be aggregated in the nested group_by values: &[String], // aggregation function agg_fn: Option, diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index 421afe0c5995..40978fe7427c 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -405,8 +405,8 @@ impl PyDataFrame { variable_name: Option<&str>, ) -> PyResult { let args = UnpivotArgs { - index: strings_to_smartstrings(index), on: strings_to_smartstrings(on), + index: strings_to_smartstrings(index), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), streamable: false, diff --git a/py-polars/src/lazyframe/mod.rs b/py-polars/src/lazyframe/mod.rs index 277a2c1980a7..ce18a4ea2f35 100644 --- a/py-polars/src/lazyframe/mod.rs +++ b/py-polars/src/lazyframe/mod.rs @@ -1121,8 +1121,8 @@ impl PyLazyFrame { streamable: bool, ) -> Self { let args = UnpivotArgs { - index: strings_to_smartstrings(index), on: strings_to_smartstrings(on), + index: strings_to_smartstrings(index), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), streamable, From d6bfd565378179f3d812460d7a2f4a09746f46d1 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:34:06 +0100 Subject: [PATCH 13/16] actually fixup --- crates/polars-core/src/frame/explode.rs | 2 +- py-polars/src/dataframe/general.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index a405067d834c..77edc1c94f89 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -239,7 +239,7 @@ impl DataFrame { /// | "a" | 5 | "D" | 6 | /// +-----+-----+----------+-------+ /// ``` - pub fn unpivot(&self, index: I, on: J) -> PolarsResult + pub fn unpivot(&self, on: I, index: J) -> PolarsResult where I: IntoVec, J: IntoVec, diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index 40978fe7427c..b01a0166661f 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -432,8 +432,8 @@ impl PyDataFrame { let agg_expr = aggregate_expr.map(|expr| expr.inner); let df = fun( &self.df, - index, on, + index, values, sort_columns, agg_expr, From 2df04e04ecdf42386836b373b317326879ea512f Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:38:34 +0100 Subject: [PATCH 14/16] more reordering and fixup rust test --- crates/polars/tests/it/core/pivot.rs | 2 +- docs/src/python/user-guide/transformations/pivot.py | 2 +- py-polars/polars/dataframe/frame.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/polars/tests/it/core/pivot.rs b/crates/polars/tests/it/core/pivot.rs index fc0e8481aa8f..144c3291a54e 100644 --- a/crates/polars/tests/it/core/pivot.rs +++ b/crates/polars/tests/it/core/pivot.rs @@ -120,8 +120,8 @@ fn test_pivot_old() { ); let pvt = pivot( &df, - ["index"], ["columns"], + ["index"], Some(["values"]), false, Some(PivotAgg::Count), diff --git a/docs/src/python/user-guide/transformations/pivot.py b/docs/src/python/user-guide/transformations/pivot.py index a51db55bbdf4..95354072d24e 100644 --- a/docs/src/python/user-guide/transformations/pivot.py +++ b/docs/src/python/user-guide/transformations/pivot.py @@ -15,7 +15,7 @@ # --8<-- [end:df] # --8<-- [start:eager] -out = df.pivot(index="foo", on="bar", values="N", aggregate_function="first") +out = df.pivot("bar", index="foo", values="N", aggregate_function="first") print(out) # --8<-- [end:eager] diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 9cc63ceb9590..357731cdf952 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7609,10 +7609,10 @@ def pivot( on Name of the column(s) whose values will be used as the header of the output DataFrame. - values - Column values to aggregate. If None, all remaining columns will be used. index One or multiple keys to group by. + values + Column values to aggregate. If None, all remaining columns will be used. aggregate_function Choose from: @@ -7644,7 +7644,7 @@ def pivot( ... "baz": [1, 2, 3, 4, 5, 6], ... } ... ) - >>> df.pivot(index="foo", on="bar", values="baz", aggregate_function="sum") + >>> df.pivot("bar", index="foo", values="baz", aggregate_function="sum") shape: (2, 3) ┌─────┬─────┬─────┐ │ foo ┆ y ┆ x │ From a8c536381b674a733ef8d4694d3408bdddb2d524 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:44:37 +0100 Subject: [PATCH 15/16] use positional in examples --- docs/src/python/user-guide/transformations/unpivot.py | 2 +- py-polars/polars/dataframe/frame.py | 2 +- py-polars/polars/lazyframe/frame.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/python/user-guide/transformations/unpivot.py b/docs/src/python/user-guide/transformations/unpivot.py index 03efe884baa0..3e79279eec17 100644 --- a/docs/src/python/user-guide/transformations/unpivot.py +++ b/docs/src/python/user-guide/transformations/unpivot.py @@ -13,6 +13,6 @@ # --8<-- [end:df] # --8<-- [start:unpivot] -out = df.unpivot(index=["A", "B"], on=["C", "D"]) +out = df.unpivot(["C", "D"], index=["A", "B"]) print(out) # --8<-- [end:unpivot] diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 357731cdf952..4c4f5f0ae885 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7849,7 +7849,7 @@ def unpivot( ... } ... ) >>> import polars.selectors as cs - >>> df.unpivot(index="a", on=cs.numeric()) + >>> df.unpivot(cs.numeric(), index="a") shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │ diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index fd7d0fb6e4be..5ff9304d584e 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5754,7 +5754,7 @@ def unpivot( ... } ... ) >>> import polars.selectors as cs - >>> lf.unpivot(index="a", on=cs.numeric()).collect() + >>> lf.unpivot(cs.numeric(), index="a").collect() shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │ From a633ecae1af3756f011a84ffb751b80f7852dd3f Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:52:53 +0100 Subject: [PATCH 16/16] `on` positional in LazyFrame.unpivot --- py-polars/polars/lazyframe/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 5ff9304d584e..51aafc3c4b15 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5705,8 +5705,8 @@ def drop_nulls( def unpivot( self, - *, on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + *, index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None,