Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python,rust!): Rename DataFrame.melt to unpivot and make parameters consistent with pivot #17095

Merged
merged 16 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 51 additions & 52 deletions crates/polars-core/src/frame/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ fn get_exploded(series: &Series) -> PolarsResult<(Series, OffsetsBuffer<i64>)> {
}
}

/// Arguments for `[DataFrame::melt]` function
/// Arguments for `[DataFrame::unpivot]` function
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
pub struct MeltArgs {
pub id_vars: Vec<SmartString>,
pub value_vars: Vec<SmartString>,
pub struct UnpivotArgs {
pub on: Vec<SmartString>,
pub index: Vec<SmartString>,
pub variable_name: Option<SmartString>,
pub value_name: Option<SmartString>,
/// Whether the melt may be done
/// Whether the unpivot may be done
/// in the streaming engine
/// This will not have a stable ordering
pub streamable: bool,
Expand Down Expand Up @@ -189,10 +189,10 @@ impl DataFrame {
///
/// # Arguments
///
/// * `id_vars` - String slice that represent the columns to use as id variables.
/// * `value_vars` - String slice that represent the columns to use as value variables.
/// * `on` - String slice that represent the columns to use as value variables.
/// * `index` - String slice that represent the columns to use as id variables.
///
/// If `value_vars` is empty all columns that are not in `id_vars` will be used.
/// If `on` is empty all columns that are not in `index` will be used.
///
/// ```ignore
/// # use polars_core::prelude::*;
Expand All @@ -202,9 +202,9 @@ impl DataFrame {
/// "D" => &[2, 4, 6]
/// )?;
///
/// let melted = df.melt(&["A", "B"], &["C", "D"])?;
/// let unpivoted = df.unpivot(&["A", "B"], &["C", "D"])?;
/// println!("{:?}", df);
/// println!("{:?}", melted);
/// println!("{:?}", unpivoted);
/// # Ok::<(), PolarsError>(())
/// ```
/// Outputs:
Expand Down Expand Up @@ -239,51 +239,51 @@ impl DataFrame {
/// | "a" | 5 | "D" | 6 |
/// +-----+-----+----------+-------+
/// ```
pub fn melt<I, J>(&self, id_vars: I, value_vars: J) -> PolarsResult<Self>
pub fn unpivot<I, J>(&self, on: I, index: J) -> PolarsResult<Self>
where
I: IntoVec<SmartString>,
J: IntoVec<SmartString>,
{
let id_vars = id_vars.into_vec();
let value_vars = value_vars.into_vec();
self.melt2(MeltArgs {
id_vars,
value_vars,
let index = index.into_vec();
let on = on.into_vec();
self.unpivot2(UnpivotArgs {
on,
index,
..Default::default()
})
}

/// Similar to melt, but without generics. This may be easier if you want to pass
/// an empty `id_vars` or empty `value_vars`.
pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
let id_vars = args.id_vars;
let mut value_vars = args.value_vars;
/// Similar to unpivot, but without generics. This may be easier if you want to pass
/// an empty `index` or empty `on`.
pub fn unpivot2(&self, args: UnpivotArgs) -> PolarsResult<Self> {
let index = args.index;
let mut on = args.on;

let variable_name = args.variable_name.as_deref().unwrap_or("variable");
let value_name = args.value_name.as_deref().unwrap_or("value");

let len = self.height();

// if value vars is empty we take all columns that are not in id_vars.
if value_vars.is_empty() {
if on.is_empty() {
// return empty frame if there are no columns available to use as value vars
if id_vars.len() == self.width() {
if index.len() == self.width() {
let variable_col = Series::new_empty(variable_name, &DataType::String);
let value_col = Series::new_empty(variable_name, &DataType::Null);

let mut out = self.select(id_vars).unwrap().clear().columns;
let mut out = self.select(index).unwrap().clear().columns;
out.push(variable_col);
out.push(value_col);

return Ok(unsafe { DataFrame::new_no_checks(out) });
}

let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str()));
value_vars = self
let index_set = PlHashSet::from_iter(index.iter().map(|s| s.as_str()));
on = self
.get_columns()
.iter()
.filter_map(|s| {
if id_vars_set.contains(s.name()) {
if index_set.contains(s.name()) {
None
} else {
Some(s.name().into())
Expand All @@ -294,7 +294,7 @@ impl DataFrame {

// values will all be placed in single column, so we must find their supertype
let schema = self.schema();
let mut iter = value_vars.iter().map(|v| {
let mut iter = on.iter().map(|v| {
schema
.get(v)
.ok_or_else(|| polars_err!(ColumnNotFound: "{}", v))
Expand All @@ -304,31 +304,30 @@ impl DataFrame {
st = try_get_supertype(&st, dt?)?;
}

// The column name of the variable that is melted
let mut variable_col =
MutableBinaryViewArray::<str>::with_capacity(len * value_vars.len() + 1);
// The column name of the variable that is unpivoted
let mut variable_col = MutableBinaryViewArray::<str>::with_capacity(len * on.len() + 1);
// prepare ids
let ids_ = self.select_with_schema_unchecked(id_vars, &schema)?;
let ids_ = self.select_with_schema_unchecked(index, &schema)?;
let mut ids = ids_.clone();
if ids.width() > 0 {
for _ in 0..value_vars.len() - 1 {
for _ in 0..on.len() - 1 {
ids.vstack_mut_unchecked(&ids_)
}
}
ids.as_single_chunk_par();
drop(ids_);

let mut values = Vec::with_capacity(value_vars.len());
let mut values = Vec::with_capacity(on.len());

for value_column_name in &value_vars {
for value_column_name in &on {
variable_col.extend_constant(len, Some(value_column_name.as_str()));
// ensure we go via the schema so we are O(1)
// self.column() is linear
// together with this loop that would make it O^2 over value_vars
// together with this loop that would make it O^2 over `on`
let (pos, _name, _dtype) = schema.try_get_full(value_column_name)?;
let col = &self.columns[pos];
let value_col = col.cast(&st).map_err(
|_| polars_err!(InvalidOperation: "'melt/unpivot' not supported for dtype: {}", col.dtype()),
|_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}", col.dtype()),
)?;
values.extend_from_slice(value_col.chunks())
}
Expand Down Expand Up @@ -434,28 +433,28 @@ mod test {

#[test]
#[cfg_attr(miri, ignore)]
fn test_melt() -> PolarsResult<()> {
fn test_unpivot() -> PolarsResult<()> {
let df = df!("A" => &["a", "b", "a"],
"B" => &[1, 3, 5],
"C" => &[10, 11, 12],
"D" => &[2, 4, 6]
)
.unwrap();

let melted = df.melt(["A", "B"], ["C", "D"])?;
let unpivoted = df.unpivot(["C", "D"], ["A", "B"])?;
assert_eq!(
Vec::from(melted.column("value")?.i32()?),
Vec::from(unpivoted.column("value")?.i32()?),
&[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)]
);

let args = MeltArgs {
id_vars: vec![],
value_vars: vec![],
let args = UnpivotArgs {
on: vec![],
index: vec![],
..Default::default()
};

let melted = df.melt2(args).unwrap();
let value = melted.column("value")?;
let unpivoted = df.unpivot2(args).unwrap();
let value = unpivoted.column("value")?;
// String because of supertype
let value = value.str()?;
let value = value.into_no_null_iter().collect::<Vec<_>>();
Expand All @@ -464,22 +463,22 @@ mod test {
&["a", "b", "a", "1", "3", "5", "10", "11", "12", "2", "4", "6"]
);

let args = MeltArgs {
id_vars: vec!["A".into()],
value_vars: vec![],
let args = UnpivotArgs {
on: vec![],
index: vec!["A".into()],
..Default::default()
};

let melted = df.melt2(args).unwrap();
let value = melted.column("value")?;
let unpivoted = df.unpivot2(args).unwrap();
let value = unpivoted.column("value")?;
let value = value.i32()?;
let value = value.into_no_null_iter().collect::<Vec<_>>();
assert_eq!(value, &[1, 3, 5, 10, 11, 12, 2, 4, 6]);
let variable = melted.column("variable")?;
let variable = unpivoted.column("variable")?;
let variable = variable.str()?;
let variable = variable.into_no_null_iter().collect::<Vec<_>>();
assert_eq!(variable, &["B", "B", "B", "C", "C", "C", "D", "D", "D"]);
assert!(melted.column("A").is_ok());
assert!(unpivoted.column("A").is_ok());
Ok(())
}
}
2 changes: 1 addition & 1 deletion crates/polars-core/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub use crate::datatypes::{ArrayCollectIterExt, *};
pub use crate::error::{
polars_bail, polars_ensure, polars_err, polars_warn, PolarsError, PolarsResult,
};
pub use crate::frame::explode::MeltArgs;
pub use crate::frame::explode::UnpivotArgs;
#[cfg(feature = "algorithm_group_by")]
pub(crate) use crate::frame::group_by::aggregations::*;
#[cfg(feature = "algorithm_group_by")]
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1600,12 +1600,12 @@ impl LazyFrame {
self.slice(neg_tail, n)
}

/// Melt the DataFrame from wide to long format.
/// Unpivot the DataFrame from wide to long format.
///
/// See [`MeltArgs`] for information on how to melt a DataFrame.
pub fn melt(self, args: MeltArgs) -> LazyFrame {
/// See [`UnpivotArgs`] for information on how to unpivot a DataFrame.
pub fn unpivot(self, args: UnpivotArgs) -> LazyFrame {
let opt_state = self.get_opt_state();
let lp = self.get_plan_builder().melt(args).build();
let lp = self.get_plan_builder().unpivot(args).build();
Self::from_logical_plan(lp, opt_state)
}

Expand Down
28 changes: 6 additions & 22 deletions crates/polars-lazy/src/frame/pivot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ impl PhysicalAggExpr for PivotExpr {

pub fn pivot<I0, I1, I2, S0, S1, S2>(
df: &DataFrame,
index: I0,
columns: I1,
on: I0,
index: I1,
values: Option<I2>,
sort_columns: bool,
agg_expr: Option<Expr>,
Expand All @@ -53,21 +53,13 @@ where
let expr = prepare_eval_expr(agg_expr);
PivotAgg::Expr(Arc::new(PivotExpr(expr)))
});
polars_ops::pivot::pivot(
df,
index,
columns,
values,
sort_columns,
agg_expr,
separator,
)
polars_ops::pivot::pivot(df, on, index, values, sort_columns, agg_expr, separator)
}

pub fn pivot_stable<I0, I1, I2, S0, S1, S2>(
df: &DataFrame,
index: I0,
columns: I1,
on: I0,
index: I1,
values: Option<I2>,
sort_columns: bool,
agg_expr: Option<Expr>,
Expand All @@ -87,13 +79,5 @@ where
let expr = prepare_eval_expr(agg_expr);
PivotAgg::Expr(Arc::new(PivotExpr(expr)))
});
polars_ops::pivot::pivot_stable(
df,
index,
columns,
values,
sort_columns,
agg_expr,
separator,
)
polars_ops::pivot::pivot_stable(df, on, index, values, sort_columns, agg_expr, separator)
}
10 changes: 5 additions & 5 deletions crates/polars-lazy/src/tests/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,18 @@ fn test_lazy_alias() {
}

#[test]
fn test_lazy_melt() {
fn test_lazy_unpivot() {
let df = get_df();

let args = MeltArgs {
id_vars: vec!["petal_width".into(), "petal_length".into()],
value_vars: vec!["sepal_length".into(), "sepal_width".into()],
let args = UnpivotArgs {
on: vec!["sepal_length".into(), "sepal_width".into()],
index: vec!["petal_width".into(), "petal_length".into()],
..Default::default()
};

let out = df
.lazy()
.melt(args)
.unpivot(args)
.filter(col("variable").eq(lit("sepal_length")))
.select([col("variable"), col("petal_width"), col("value")])
.collect()
Expand Down
Loading
Loading