Skip to content

Commit

Permalink
split: implement round-robin arg to --number
Browse files Browse the repository at this point in the history
Implement distributing lines of a file in a round-robin manner to a
specified number of chunks. For example,

    $ (seq 1 10 | split -n r/3) && head -v xa[abc]
    ==> xaa <==
    1
    4
    7
    10

    ==> xab <==
    2
    5
    8

    ==> xac <==
    3
    6
    9
  • Loading branch information
jfinkels committed May 21, 2022
1 parent d921073 commit 26e1583
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 0 deletions.
43 changes: 43 additions & 0 deletions src/uu/split/src/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,46 @@ where
Ok(())
}

fn split_into_n_chunks_by_line_round_robin<R>(
settings: &Settings,
reader: &mut R,
num_chunks: u64,
) -> UResult<()>
where
R: BufRead,
{
// This object is responsible for creating the filename for each chunk.
let mut filename_iterator = FilenameIterator::new(
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.suffix_type,
);

// Create one writer for each chunk. This will create each
// of the underlying files (if not in `--filter` mode).
let mut writers = vec![];
for _ in 0..num_chunks {
let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
writers.push(writer);
}

let num_chunks: usize = num_chunks.try_into().unwrap();
for (i, line_result) in reader.lines().enumerate() {
let line = line_result.unwrap();
let maybe_writer = writers.get_mut(i % num_chunks);
let writer = maybe_writer.unwrap();
let bytes = line.as_bytes();
writer.write_all(bytes)?;
writer.write_all(b"\n")?;
}

Ok(())
}

fn split(settings: &Settings) -> UResult<()> {
let mut reader = BufReader::new(if settings.input == "-" {
Box::new(stdin()) as Box<dyn Read>
Expand All @@ -1201,6 +1241,9 @@ fn split(settings: &Settings) -> UResult<()> {
let chunk_number = chunk_number - 1;
kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks)
}
Strategy::Number(NumberType::RoundRobin(num_chunks)) => {
split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks)
}
Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
Strategy::Lines(chunk_size) => {
let mut writer = LineChunkWriter::new(chunk_size, settings)
Expand Down
16 changes: 16 additions & 0 deletions tests/by-util/test_split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,3 +655,19 @@ fn test_line_bytes_no_empty_file() {
assert_eq!(at.read("xaj"), "4");
assert!(!at.plus("xak").exists());
}

#[test]
fn test_round_robin() {
let (at, mut ucmd) = at_and_ucmd!();

let file_read = |f| {
let mut s = String::new();
at.open(f).read_to_string(&mut s).unwrap();
s
};

ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds();

assert_eq!(file_read("xaa"), "1\n3\n5\n");
assert_eq!(file_read("xab"), "2\n4\n");
}

0 comments on commit 26e1583

Please sign in to comment.