Skip to content

Commit

Permalink
df: always produce the same order in output table
Browse files Browse the repository at this point in the history
Change the `filter_mount_list()` function so that it always produces
the same order of `MountInfo` objects. This change ultimately results
in `df` printing its table of filesystems in the same order on each
execution. Previously, the table was in an arbitrary order because the
`MountInfo` objects were read from a `HashMap`.

Fixes uutils#3086.
  • Loading branch information
jfinkels committed Feb 13, 2022
1 parent 042e537 commit 20ce4aa
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 28 deletions.
50 changes: 22 additions & 28 deletions src/uu/df/src/df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ use uucore::fsext::{read_fs_list, FsUsage, MountInfo};

use clap::{crate_version, App, AppSettings, Arg, ArgMatches};

use std::cell::Cell;
use std::collections::HashMap;
use std::collections::HashSet;
#[cfg(unix)]
use std::ffi::CString;
Expand Down Expand Up @@ -218,6 +216,19 @@ fn mount_info_lt(m1: &MountInfo, m2: &MountInfo) -> bool {
true
}

/// Whether to prioritize given mount info over all others on the same device.
///
/// This function decides whether the mount info `mi` is better than
/// all others in `previous` that mount the same device as `mi`.
fn is_best(previous: &[MountInfo], mi: &MountInfo) -> bool {
for seen in previous {
if seen.dev_id == mi.dev_id && mount_info_lt(mi, seen) {
return false;
}
}
true
}

/// Keep only the specified subset of [`MountInfo`] instances.
///
/// If `paths` is non-empty, this function excludes any [`MountInfo`]
Expand All @@ -229,35 +240,18 @@ fn mount_info_lt(m1: &MountInfo, m2: &MountInfo) -> bool {
/// Finally, if there are duplicate entries, the one with the shorter
/// path is kept.
fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Vec<MountInfo> {
let mut mount_info_by_id = HashMap::<String, Cell<MountInfo>>::new();
let mut result = vec![];
for mi in vmi {
if !is_included(&mi, paths, opt) {
continue;
}

// If the device ID has not been encountered yet, just store it.
let id = mi.dev_id.clone();
#[allow(clippy::map_entry)]
if !mount_info_by_id.contains_key(&id) {
mount_info_by_id.insert(id, Cell::new(mi));
continue;
}

// Otherwise, if we have seen the current device ID before,
// then check if we need to update it or keep the previously
// seen one.
let seen = mount_info_by_id[&id].replace(mi.clone());
if mount_info_lt(&mi, &seen) {
mount_info_by_id[&id].replace(seen);
// TODO The running time of the `is_best()` function is linear
// in the length of `result`. That makes the running time of
// this loop quadratic in the length of `vmi`. This could be
// improved by a more efficient implementation of `is_best()`,
// but `vmi` is probably not very long in practice.
if is_included(&mi, paths, opt) && is_best(&result, &mi) {
result.push(mi);
}
}

// Take ownership of the `MountInfo` instances and collect them
// into a `Vec`.
mount_info_by_id
.into_values()
.map(|m| m.into_inner())
.collect()
result
}

#[uucore::main]
Expand Down
21 changes: 21 additions & 0 deletions tests/by-util/test_df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,25 @@ fn test_df_output() {
}
}

/// Test that the order of rows in the table does not change across executions.
#[test]
fn test_order_same() {
// TODO When #3057 is resolved, we should just use
//
// new_ucmd!().args(&["--output=source"]).succeeds().stdout_move_str();
//
// instead of parsing the entire `df` table as a string.
let output1 = new_ucmd!().succeeds().stdout_move_str();
let output2 = new_ucmd!().succeeds().stdout_move_str();
let output1: Vec<String> = output1
.lines()
.map(|l| String::from(l.split_once(' ').unwrap().0))
.collect();
let output2: Vec<String> = output2
.lines()
.map(|l| String::from(l.split_once(' ').unwrap().0))
.collect();
assert_eq!(output1, output2);
}

// ToDO: more tests...

0 comments on commit 20ce4aa

Please sign in to comment.