Skip to content

Commit

Permalink
Merge branch 'copy-packetline'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Apr 9, 2024
2 parents 68d1a29 + ebb6ef5 commit a9b783d
Show file tree
Hide file tree
Showing 22 changed files with 2,286 additions and 3 deletions.
5 changes: 4 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
**/generated-archives/*.tar.xz filter=lfs-disabled diff=lfs merge=lfs -text

# assure line feeds don't interfere with our working copy hash
# assure line feeds don't interfere with our working copy hash
**/tests/fixtures/**/*.sh text crlf=input eol=lf
/justfile text crlf=input eol=lf

# have GitHub treat the gix-packetline-blocking src copy as auto-generated
gix-packetline-blocking/src/ linguist-generated=true
32 changes: 31 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ jobs:
# Let's not fail CI for this, it will fail locally often enough, and a crate a little bigger
# than allows is no problem either if it comes to that.
just check-size || true
cargo-deny:
runs-on: ubuntu-latest
strategy:
Expand All @@ -193,6 +193,7 @@ jobs:
- uses: EmbarkStudios/cargo-deny-action@v1
with:
command: check ${{ matrix.checks }}

wasm:
name: WebAssembly
runs-on: ubuntu-latest
Expand All @@ -213,3 +214,32 @@ jobs:
name: crates with 'wasm' feature
- run: cd gix-pack && cargo build --all-features --target ${{ matrix.target }}
name: gix-pack with all features (including wasm)

check-packetline:
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
# We consider this script read-only and its effect is the same everywhere.
# However, when changes are made to `etc/copy-packetline.sh`, re-enable the other platforms for testing.
# - macos-latest
# - windows-latest
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v4
- name: Check that working tree is initially clean
run: |
set -x
git status
git diff --exit-code
- name: Regenerate gix-packetline-blocking/src
run: etc/copy-packetline.sh
- name: Check that gix-packetline-blocking/src was already up to date
run: |
set -x
git status
git diff --exit-code
152 changes: 152 additions & 0 deletions etc/copy-packetline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/bin/bash

set -euC -o pipefail

readonly input_dir='gix-packetline/src'
readonly output_parent_dir='gix-packetline-blocking'
readonly output_dir="$output_parent_dir/src"

function fail () {
printf '%s: error: %s\n' "$0" "$1" >&2
exit 1
}

function chdir_toplevel () {
local root_padded root

# Find the working tree's root. (Padding covers the trailing-newline case.)
root_padded="$(git rev-parse --show-toplevel && echo -n .)" ||
fail 'git-rev-parse failed to find top-level dir'
root="${root_padded%$'\n.'}"

cd -- "$root"
}

function merging () {
local git_dir_padded git_dir

# Find the .git directory. (Padding covers the trailing-newline case.)
git_dir_padded="$(git rev-parse --git-dir && echo -n .)" ||
fail 'git-rev-parse failed to find git dir'
git_dir="${git_dir_padded%$'\n.'}"

test -e "$git_dir/MERGE_HEAD"
}

function output_dir_status () {
git status --porcelain --ignored=traditional -- "$output_dir" ||
fail 'git-status failed'
}

function check_output_dir () {
if ! test -e "$output_dir"; then
# The destination does not exist on disk, so nothing will be lost. Proceed.
return
fi

if merging; then
# In a merge, it would be confusing to replace anything at the destination.
if output_dir_status | grep -q '^'; then
fail 'output location exists, and a merge is in progress'
fi
else
# We can lose data if anything of value at the destination is not in the
# index. (This includes unstaged deletions, for two reasons. We could lose
# track of which files had been deleted. More importantly, replacing a
# staged symlink or regular file with an unstaged directory is shown by
# git-status as only a deletion, even if the directory is non-empty.)
if output_dir_status | grep -q '^.[^ ]'; then
fail 'output location exists, with unstaged changes or ignored files'
fi
fi
}

function first_line_ends_crlf () {
# This is tricky to check portably. In Cygwin-like environments including
# MSYS2 and Git Bash, most text processing tools, including awk, sed, and
# grep, automatically ignore \r before \n. Some ignore \r everywhere. Some
# can be told to keep \r, but in non-portable ways that may affect other
# implementations. Bash ignores \r in some places even without "-o igncr",
# and ignores \r even more with it, including in all text from command
# substitution. Simple checks may be non-portable to other OSes. Fortunately,
# tools that treat input as binary data are exempt (even cat, but "-v" is
# non-portable, and unreliable in general because lines can end in "^M").
# This may be doable without od, by using tr more heavily, but it could be
# hard to avoid false positives with unexpected characters or \r without \n.

head -n 1 -- "$1" | # Get the longest prefix with no non-trailing \n byte.
od -An -ta | # Represent all bytes symbolically, without addresses.
tr -sd '\n' ' ' | # Scrunch into one line, so "cr nl" appears as such.
grep -q 'cr nl$' # Check if the result signifies a \r\n line ending.
}

function make_header () {
local input_file endline

input_file="$1"
endline="$2"

# shellcheck disable=SC2016 # The backticks are intentionally literal.
printf '// DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \
"$input_file" "$endline" "$endline"
}

function copy_with_header () {
local input_file output_file endline

input_file="$1"
output_file="$2"

if first_line_ends_crlf "$input_file"; then
endline=$'\r\n'
else
endline=$'\n'
fi

make_header "$input_file" "$endline" | cat -- - "$input_file" >"$output_file"
}

function generate_one () {
local input_file output_file

input_file="$1"
output_file="$output_dir${input_file#"$input_dir"}"

if test -d "$input_file"; then
mkdir -p -- "$output_file"
elif test -L "$input_file"; then
# Cover this case separately, for more useful error messages.
fail "input file is symbolic link: $input_file"
elif ! test -f "$input_file"; then
# This covers less common kinds of files we can't or shouldn't process.
fail "input file neither regular file nor directory: $input_file"
elif [[ "$input_file" =~ \.rs$ ]]; then
copy_with_header "$input_file" "$output_file"
else
fail "input file not named as Rust source code: $input_file"
fi
}

function generate_all () {
local input_file

if ! test -d "$input_dir"; then
fail "no input directory: $input_dir"
fi
if ! test -d "$output_parent_dir"; then
fail "no output parent directory: $output_parent_dir"
fi
check_output_dir

rm -rf -- "$output_dir" # It may be a directory, symlink, or regular file.
if test -e "$output_dir"; then
fail 'unable to remove output location'
fi

find "$input_dir" -print0 | while IFS= read -r -d '' input_file; do
generate_one "$input_file"
done
}

chdir_toplevel
generate_all
1 change: 0 additions & 1 deletion gix-packetline-blocking/src

This file was deleted.

148 changes: 148 additions & 0 deletions gix-packetline-blocking/src/decode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// DO NOT EDIT - this is a copy of gix-packetline/src/decode.rs. Run `just copy-packetline` to update it.

use bstr::BString;

use crate::{PacketLineRef, DELIMITER_LINE, FLUSH_LINE, MAX_DATA_LEN, MAX_LINE_LEN, RESPONSE_END_LINE, U16_HEX_BYTES};

/// The error used in the [`decode`][mod@crate::decode] module
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Failed to decode the first four hex bytes indicating the line length: {err}")]
HexDecode { err: String },
#[error("The data received claims to be larger than the maximum allowed size: got {length_in_bytes}, exceeds {MAX_DATA_LEN}")]
DataLengthLimitExceeded { length_in_bytes: usize },
#[error("Received an invalid empty line")]
DataIsEmpty,
#[error("Received an invalid line of length 3")]
InvalidLineLength,
#[error("{data:?} - consumed {bytes_consumed} bytes")]
Line { data: BString, bytes_consumed: usize },
#[error("Needing {bytes_needed} additional bytes to decode the line successfully")]
NotEnoughData { bytes_needed: usize },
}

///
#[allow(clippy::empty_docs)]
pub mod band {
/// The error used in [`PacketLineRef::decode_band()`][super::PacketLineRef::decode_band()].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("attempt to decode a non-side channel line or input was malformed: {band_id}")]
InvalidSideBand { band_id: u8 },
#[error("attempt to decode a non-data line into a side-channel band")]
NonDataLine,
}
}

/// A utility return type to support incremental parsing of packet lines.
#[derive(Debug, Clone)]
pub enum Stream<'a> {
/// Indicate a single packet line was parsed completely
Complete {
/// The parsed packet line
line: PacketLineRef<'a>,
/// The amount of bytes consumed from input
bytes_consumed: usize,
},
/// A packet line could not yet be parsed due to missing bytes
Incomplete {
/// The amount of additional bytes needed for the parsing to complete
bytes_needed: usize,
},
}

/// The result of [`hex_prefix()`] indicating either a special packet line or the amount of wanted bytes
pub enum PacketLineOrWantedSize<'a> {
/// The special kind of packet line decoded from the hex prefix. It never contains actual data.
Line(PacketLineRef<'a>),
/// The amount of bytes indicated by the hex prefix of the packet line.
Wanted(u16),
}

/// Decode the `four_bytes` packet line prefix provided in hexadecimal form and check it for validity.
pub fn hex_prefix(four_bytes: &[u8]) -> Result<PacketLineOrWantedSize<'_>, Error> {
debug_assert_eq!(four_bytes.len(), 4, "need four hex bytes");
for (line_bytes, line_type) in &[
(FLUSH_LINE, PacketLineRef::Flush),
(DELIMITER_LINE, PacketLineRef::Delimiter),
(RESPONSE_END_LINE, PacketLineRef::ResponseEnd),
] {
if four_bytes == *line_bytes {
return Ok(PacketLineOrWantedSize::Line(*line_type));
}
}

let mut buf = [0u8; U16_HEX_BYTES / 2];
faster_hex::hex_decode(four_bytes, &mut buf).map_err(|err| Error::HexDecode { err: err.to_string() })?;
let wanted_bytes = u16::from_be_bytes(buf);

if wanted_bytes == 3 {
return Err(Error::InvalidLineLength);
}
if wanted_bytes == 4 {
return Err(Error::DataIsEmpty);
}
debug_assert!(
wanted_bytes as usize > U16_HEX_BYTES,
"by now there should be more wanted bytes than prefix bytes"
);
Ok(PacketLineOrWantedSize::Wanted(wanted_bytes - U16_HEX_BYTES as u16))
}

/// Obtain a `PacketLine` from `data` after assuring `data` is small enough to fit.
pub fn to_data_line(data: &[u8]) -> Result<PacketLineRef<'_>, Error> {
if data.len() > MAX_LINE_LEN {
return Err(Error::DataLengthLimitExceeded {
length_in_bytes: data.len(),
});
}

Ok(PacketLineRef::Data(data))
}

/// Decode `data` as packet line while reporting whether the data is complete or not using a [`Stream`].
pub fn streaming(data: &[u8]) -> Result<Stream<'_>, Error> {
let data_len = data.len();
if data_len < U16_HEX_BYTES {
return Ok(Stream::Incomplete {
bytes_needed: U16_HEX_BYTES - data_len,
});
}
let wanted_bytes = match hex_prefix(&data[..U16_HEX_BYTES])? {
PacketLineOrWantedSize::Wanted(s) => s as usize,
PacketLineOrWantedSize::Line(line) => {
return Ok(Stream::Complete {
line,
bytes_consumed: 4,
})
}
} + U16_HEX_BYTES;
if wanted_bytes > MAX_LINE_LEN {
return Err(Error::DataLengthLimitExceeded {
length_in_bytes: wanted_bytes,
});
}
if data_len < wanted_bytes {
return Ok(Stream::Incomplete {
bytes_needed: wanted_bytes - data_len,
});
}

Ok(Stream::Complete {
line: to_data_line(&data[U16_HEX_BYTES..wanted_bytes])?,
bytes_consumed: wanted_bytes,
})
}

/// Decode an entire packet line from data or fail.
///
/// Note that failure also happens if there is not enough data to parse a complete packet line, as opposed to [`streaming()`] decoding
/// succeeds in that case, stating how much more bytes are required.
pub fn all_at_once(data: &[u8]) -> Result<PacketLineRef<'_>, Error> {
match streaming(data)? {
Stream::Complete { line, .. } => Ok(line),
Stream::Incomplete { bytes_needed } => Err(Error::NotEnoughData { bytes_needed }),
}
}
Loading

0 comments on commit a9b783d

Please sign in to comment.