Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: reimplement optimized natural sorting algorithm, speed up ~6 times for case-insensitive sorting #237

Merged
merged 1 commit into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion config/preset/yazi.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[manager]
layout = [ 1, 4, 3 ]
sort_by = "modified"
sort_sensitive = false
sort_sensitive = true
sort_reverse = true
sort_dir_first = true
show_hidden = false
Expand Down
1 change: 0 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ crossterm = "^0"
futures = "^0"
indexmap = "^2"
libc = "^0"
natord = "^1"
notify = { version = "^6", default-features = false, features = [ "macos_fsevent" ] }
parking_lot = "^0"
ratatui = "^0"
Expand Down
13 changes: 4 additions & 9 deletions core/src/files/sorter.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{cmp::Ordering, collections::BTreeMap, mem};

use config::{manager::SortBy, MANAGER};
use shared::Url;
use shared::{natsort, Url};

use super::File;

Expand Down Expand Up @@ -37,8 +37,8 @@ impl FilesSorter {
}

self.cmp(
a.url.as_os_str().to_ascii_lowercase(),
b.url.as_os_str().to_ascii_lowercase(),
a.url.as_os_str().to_ascii_uppercase(),
b.url.as_os_str().to_ascii_uppercase(),
self.promote(a, b),
)
}),
Expand Down Expand Up @@ -78,12 +78,7 @@ impl FilesSorter {
return promote;
}

let ordering = if self.sensitive {
natord::compare(&entities[a].0, &entities[b].0)
} else {
natord::compare_ignore_case(&entities[a].0, &entities[b].0)
};

let ordering = natsort(&entities[a].0, &entities[b].0, !self.sensitive);
if self.reverse { ordering.reverse() } else { ordering }
});

Expand Down
2 changes: 1 addition & 1 deletion cspell.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version":"0.2","words":["Punct","KEYMAP","splitn","crossterm","YAZI","unar","peekable","ratatui","syntect","pbpaste","pbcopy","ffmpegthumbnailer","oneshot","Posix","Lsar","XADDOS","zoxide","cands","Deque","precache","imageops","IFBLK","IFCHR","IFDIR","IFIFO","IFLNK","IFMT","IFSOCK","IRGRP","IROTH","IRUSR","ISGID","ISUID","ISVTX","IWGRP","IWOTH","IWUSR","IXGRP","IXOTH","IXUSR","libc","winsize","TIOCGWINSZ","xpixel","ypixel","ioerr","appender","Catppuccin","macchiato","gitmodules","Dotfiles","bashprofile","vimrc","flac","webp","exiftool","mediainfo","ripgrep","nvim","indexmap","indexmap","unwatch","canonicalize","serde","fsevent","Ueberzug","iterm","wezterm","sixel","chafa","ueberzugpp","️ Überzug","️ Überzug","Konsole","Alacritty","Überzug","pkgs","paru","unarchiver","pdftoppm","poppler","prebuild","singlefile","jpegopt","EXIF","rustfmt","mktemp","nanos","xclip","xsel","natord","Mintty","nixos","nixpkgs","SIGTSTP","SIGCONT","SIGCONT","backstack"],"flagWords":[],"language":"en"}
{"version":"0.2","flagWords":[],"words":["Punct","KEYMAP","splitn","crossterm","YAZI","unar","peekable","ratatui","syntect","pbpaste","pbcopy","ffmpegthumbnailer","oneshot","Posix","Lsar","XADDOS","zoxide","cands","Deque","precache","imageops","IFBLK","IFCHR","IFDIR","IFIFO","IFLNK","IFMT","IFSOCK","IRGRP","IROTH","IRUSR","ISGID","ISUID","ISVTX","IWGRP","IWOTH","IWUSR","IXGRP","IXOTH","IXUSR","libc","winsize","TIOCGWINSZ","xpixel","ypixel","ioerr","appender","Catppuccin","macchiato","gitmodules","Dotfiles","bashprofile","vimrc","flac","webp","exiftool","mediainfo","ripgrep","nvim","indexmap","indexmap","unwatch","canonicalize","serde","fsevent","Ueberzug","iterm","wezterm","sixel","chafa","ueberzugpp","️ Überzug","️ Überzug","Konsole","Alacritty","Überzug","pkgs","paru","unarchiver","pdftoppm","poppler","prebuild","singlefile","jpegopt","EXIF","rustfmt","mktemp","nanos","xclip","xsel","Mintty","nixos","nixpkgs","SIGTSTP","SIGCONT","SIGCONT","backstack","natsort","natsort"],"language":"en"}
2 changes: 2 additions & 0 deletions shared/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod errors;
mod fns;
mod fs;
mod mime;
mod natsort;
mod ro_cell;
mod term;
mod throttle;
Expand All @@ -20,6 +21,7 @@ pub use errors::*;
pub use fns::*;
pub use fs::*;
pub use mime::*;
pub use natsort::*;
pub use ro_cell::*;
pub use term::*;
pub use throttle::*;
Expand Down
276 changes: 276 additions & 0 deletions shared/src/natsort.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
// A natural sort implementation in Rust.
// Copyright (c) 2023, sxyazi.
//
// This is a port of the C version of Martin Pool's `strnatcmp.c`:
// http://sourcefrog.net/projects/natsort/

use std::cmp::Ordering;

macro_rules! return_unless_equal {
($ord:expr) => {
match $ord {
Ordering::Equal => {}
ord => return ord,
}
};
}

#[inline(always)]
fn compare_left(left: &[u8], right: &[u8], li: &mut usize, ri: &mut usize) -> Ordering {
let mut l;
let mut r;

loop {
l = left.get(*li);
r = right.get(*ri);

match (l.is_some_and(|b| b.is_ascii_digit()), r.is_some_and(|b| b.is_ascii_digit())) {
(true, true) => {
return_unless_equal!(unsafe { l.unwrap_unchecked().cmp(r.unwrap_unchecked()) })
}
(true, false) => return Ordering::Greater,
(false, true) => return Ordering::Less,
(false, false) => return Ordering::Equal,
}

*li += 1;
*ri += 1;
}
}

#[inline(always)]
fn compare_right(left: &[u8], right: &[u8], li: &mut usize, ri: &mut usize) -> Ordering {
let mut l;
let mut r;
let mut bias = Ordering::Equal;

loop {
l = left.get(*li);
r = right.get(*ri);

match (l.is_some_and(|b| b.is_ascii_digit()), r.is_some_and(|b| b.is_ascii_digit())) {
(true, true) => {
if bias == Ordering::Equal {
bias = unsafe { l.unwrap_unchecked().cmp(r.unwrap_unchecked()) };
}
}
(true, false) => return Ordering::Greater,
(false, true) => return Ordering::Less,
(false, false) => return bias,
}

*li += 1;
*ri += 1;
}
}

pub fn natsort(left: &str, right: &str, insensitive: bool) -> Ordering {
let left = left.as_bytes();
let right = right.as_bytes();

let mut li = 0;
let mut ri = 0;

let mut l = left.get(li);
let mut r = right.get(ri);

macro_rules! left_next {
() => {{
li += 1;
l = left.get(li);
}};
}

macro_rules! right_next {
() => {{
ri += 1;
r = right.get(ri);
}};
}

loop {
while l.is_some_and(|c| c.is_ascii_whitespace()) {
left_next!();
}
while r.is_some_and(|c| c.is_ascii_whitespace()) {
right_next!();
}

match (l, r) {
(Some(&ll), Some(&rr)) => {
if ll.is_ascii_digit() && rr.is_ascii_digit() {
if ll == b'0' || rr == b'0' {
return_unless_equal!(compare_left(left, right, &mut li, &mut ri));
} else {
return_unless_equal!(compare_right(left, right, &mut li, &mut ri));
}

l = left.get(li);
r = right.get(ri);
continue;
}

if insensitive {
return_unless_equal!(ll.to_ascii_lowercase().cmp(&rr.to_ascii_lowercase()));
} else {
return_unless_equal!(ll.cmp(&rr));
}
}
(Some(_), None) => return Ordering::Greater,
(None, Some(_)) => return Ordering::Less,
(None, None) => return Ordering::Equal,
}

left_next!();
right_next!();
}
}

#[cfg(test)]
mod tests {
use super::*;

fn cmp(left: &[&str]) {
let mut right = left.to_vec();
right.sort_by(|a, b| natsort(a, b, true));
assert_eq!(left, right);
}

#[test]
fn test_natsort() {
let dates = vec!["1999-3-3", "1999-12-25", "2000-1-2", "2000-1-10", "2000-3-23"];
let fractions = vec![
"1.002.01", "1.002.03", "1.002.08", "1.009.02", "1.009.10", "1.009.20", "1.010.12",
"1.011.02",
];
let words = vec![
"1-02",
"1-2",
"1-20",
"10-20",
"fred",
"jane",
"pic01",
"pic02",
"pic02a",
"pic02000",
"pic05",
"pic2",
"pic3",
"pic4",
"pic 4 else",
"pic 5",
"pic 5 ",
"pic 5 something",
"pic 6",
"pic 7",
"pic100",
"pic100a",
"pic120",
"pic121",
"tom",
"x2-g8",
"x2-y08",
"x2-y7",
"x8-y8",
];

cmp(&dates);
cmp(&fractions);
cmp(&words);
}

// #[test]
// fn test_bench() {
// use std::time::Instant;
//
// let files = vec![
// "pexels-asad-photo-maldives-1024967.jpg",
// "154586 (540p).mp4",
// "163333 (1080p).mp4",
// "166808 (540p).mp4",
// "178732 (1080p).mp4",
// "archive",
// "file.rs",
// "no copyright.pdf",
// "pexels-alex-fu-1302436.jpg",
// "pexels-alexander-grey-1191710.jpg",
// "pexels-benjamin-suter-2362002.jpg",
// "pexels-blaque-x-863963.jpg",
// "pexels-brakou-abdelghani-1723637.jpg",
// "pexels-chevanon-photography-1335971.jpg",
// "pexels-craig-adderley-1563356.jpg",
// "pexels-danne-516541.jpg",
// "pexels-eberhard-grossgasteiger-443446.jpg",
// "pexels-egil-sjøholt-1906658.jpg",
// "pexels-felix-mittermeier-2832041.jpg",
// "pexels-gabriel-peter-719396.jpg",
// "pexels-james-wheeler-1519088.jpg",
// "pexels-jonas-kakaroto-736230.jpg",
// "pexels-katie-burandt-1212693.jpg",
// "pexels-marta-branco-1173576.jpg",
// "pexels-matthew-montrone-1324803.jpg",
// "pexels-max-andrey-1366630.jpg",
// "pexels-nick-collins-1266741.jpg",
// "pexels-oliver-sjöström-1433052.jpg",
// "pexels-photomix-company-1002725.jpg",
// "pexels-pixabay-15239.jpg",
// "pexels-pixabay-33045.jpg",
// "pexels-pixabay-33101.jpg",
// "pexels-pixabay-33109.jpg",
// "pexels-pixabay-36717.jpg",
// "pexels-pixabay-36729.jpg",
// "pexels-pixabay-36762.jpg",
// "pexels-pixabay-45911.jpg",
// "pexels-pixabay-47334.jpg",
// "pexels-pixabay-50594.jpg",
// "pexels-pixabay-59990.jpg",
// "pexels-pixabay-60597.jpg",
// "pexels-pixabay-68507.jpg",
// "pexels-pixabay-158536.jpg",
// "pexels-pixabay-207088.jpg",
// "pexels-pixabay-327509.jpg",
// "pexels-pixabay-358457.jpg",
// "pexels-pixabay-372166.jpg",
// "pexels-pixabay-459203.jpg",
// "pexels-sevenstorm-juhaszimrus-891030.jpg",
// "pexels-steve-johnson-1266808.jpg",
// "pexels-suneo-103573.jpg",
// "pexels-tetyana-kovyrina-937980.jpg",
// "pexels-valeria-boltneva-1484657.jpg",
// "pexels-vlad-chețan-2604929.jpg",
// "pexels-wang-teck-heng-117139.jpg",
// "pexels-yuliya-strizhkina-1198802.jpg",
// "precache.rs",
// "scheduler.rs",
// "Symbols-0.73.0-x64.zip",
// "tasks.rs",
// ];
//
// {
// let mut large1 = files.repeat(2000);
// let mut large2 = files.repeat(2000);
//
// let now = Instant::now();
// large1.sort_unstable_by(|a, b| natord::compare_ignore_case(a, b));
// println!("natord crate (insensitive) - Elapsed: {:.2?}", now.elapsed());
//
// let now = Instant::now();
// large2.sort_unstable_by(|a, b| natsort(a, b, true));
// println!("Yazi (insensitive) - Elapsed: {:.2?}", now.elapsed());
// }
// println!();
// {
// let mut large1 = files.repeat(2000);
// let mut large2 = files.repeat(2000);
//
// let now = Instant::now();
// large1.sort_unstable_by(|a, b| natord::compare(a, b));
// println!("natord crate (sensitive) - Elapsed: {:.2?}", now.elapsed());
//
// let now = Instant::now();
// large2.sort_unstable_by(|a, b| natsort(a, b, false));
// println!("Yazi (sensitive) - Elapsed: {:.2?}", now.elapsed());
// }
// }
}