Skip to content

Commit

Permalink
enable randomize again
Browse files Browse the repository at this point in the history
  • Loading branch information
gschoeni committed Dec 5, 2024
1 parent 9d4883b commit fc41f5e
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions src/lib/src/core/df/tabular.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,22 @@ pub fn transform_lazy(mut df: LazyFrame, opts: DFOpts) -> Result<LazyFrame, Oxen
}
}

if opts.should_randomize {
log::debug!("transform_lazy randomizing df");
let full_df = df.collect()
.map_err(|e| OxenError::basic_str(format!("{e:?}")))?;
let n = Series::new("".into(), &[full_df.height() as i64]);

df = full_df.sample_n(
&n, // no specific rows to sample, use n parameter instead
false, // without replacement
true, // shuffle
None, // seed
)
.map_err(|e| OxenError::basic_str(format!("Failed to randomize dataframe: {e:?}")))?
.lazy();
}

if let Some(columns) = opts.unique_columns() {
df = unique_df(df, columns)?;
}
Expand Down

0 comments on commit fc41f5e

Please sign in to comment.