df: always produce the same order in output table

Change the `filter_mount_list()` function so that it always produces the same order of `MountInfo` objects. This change ultimately results in `df` printing its table of filesystems in the same order on each execution. Previously, the table was in an arbitrary order because the `MountInfo` objects were read from a `HashMap`. Fixes uutils#3086.
jfinkels · Feb 13, 2022 · 20ce4aa · 20ce4aa
1 parent 042e537
commit 20ce4aa
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 28 deletions.
diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs
@@ -14,8 +14,6 @@ use uucore::fsext::{read_fs_list, FsUsage, MountInfo};
 
 use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
 
-use std::cell::Cell;
-use std::collections::HashMap;
 use std::collections::HashSet;
 #[cfg(unix)]
 use std::ffi::CString;
@@ -218,6 +216,19 @@ fn mount_info_lt(m1: &MountInfo, m2: &MountInfo) -> bool {
     true
 }
 
+/// Whether to prioritize given mount info over all others on the same device.
+///
+/// This function decides whether the mount info `mi` is better than
+/// all others in `previous` that mount the same device as `mi`.
+fn is_best(previous: &[MountInfo], mi: &MountInfo) -> bool {
+    for seen in previous {
+        if seen.dev_id == mi.dev_id && mount_info_lt(mi, seen) {
+            return false;
+        }
+    }
+    true
+}
+
 /// Keep only the specified subset of [`MountInfo`] instances.
 ///
 /// If `paths` is non-empty, this function excludes any [`MountInfo`]
@@ -229,35 +240,18 @@ fn mount_info_lt(m1: &MountInfo, m2: &MountInfo) -> bool {
 /// Finally, if there are duplicate entries, the one with the shorter
 /// path is kept.
 fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Vec<MountInfo> {
-    let mut mount_info_by_id = HashMap::<String, Cell<MountInfo>>::new();
+    let mut result = vec![];
     for mi in vmi {
-        if !is_included(&mi, paths, opt) {
-            continue;
-        }
-
-        // If the device ID has not been encountered yet, just store it.
-        let id = mi.dev_id.clone();
-        #[allow(clippy::map_entry)]
-        if !mount_info_by_id.contains_key(&id) {
-            mount_info_by_id.insert(id, Cell::new(mi));
-            continue;
-        }
-
-        // Otherwise, if we have seen the current device ID before,
-        // then check if we need to update it or keep the previously
-        // seen one.
-        let seen = mount_info_by_id[&id].replace(mi.clone());
-        if mount_info_lt(&mi, &seen) {
-            mount_info_by_id[&id].replace(seen);
+        // TODO The running time of the `is_best()` function is linear
+        // in the length of `result`. That makes the running time of
+        // this loop quadratic in the length of `vmi`. This could be
+        // improved by a more efficient implementation of `is_best()`,
+        // but `vmi` is probably not very long in practice.
+        if is_included(&mi, paths, opt) && is_best(&result, &mi) {
+            result.push(mi);
         }
     }
-
-    // Take ownership of the `MountInfo` instances and collect them
-    // into a `Vec`.
-    mount_info_by_id
-        .into_values()
-        .map(|m| m.into_inner())
-        .collect()
+    result
 }
 
 #[uucore::main]

diff --git a/tests/by-util/test_df.rs b/tests/by-util/test_df.rs
@@ -37,4 +37,25 @@ fn test_df_output() {
     }
 }
 
+/// Test that the order of rows in the table does not change across executions.
+#[test]
+fn test_order_same() {
+    // TODO When #3057 is resolved, we should just use
+    //
+    //     new_ucmd!().args(&["--output=source"]).succeeds().stdout_move_str();
+    //
+    // instead of parsing the entire `df` table as a string.
+    let output1 = new_ucmd!().succeeds().stdout_move_str();
+    let output2 = new_ucmd!().succeeds().stdout_move_str();
+    let output1: Vec<String> = output1
+        .lines()
+        .map(|l| String::from(l.split_once(' ').unwrap().0))
+        .collect();
+    let output2: Vec<String> = output2
+        .lines()
+        .map(|l| String::from(l.split_once(' ').unwrap().0))
+        .collect();
+    assert_eq!(output1, output2);
+}
+
 // ToDO: more tests...