Merge branch 'copy-packetline'

EliahKagan · Apr 9, 2024 · a9b783d · a9b783d
2 parents 68d1a29 + ebb6ef5
commit a9b783d
Show file tree

Hide file tree

Showing 22 changed files with 2,286 additions and 3 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1,5 +1,8 @@
 **/generated-archives/*.tar.xz filter=lfs-disabled diff=lfs merge=lfs -text
 
-# assure line feeds don't interfere with our working copy hash 
+# assure line feeds don't interfere with our working copy hash
 **/tests/fixtures/**/*.sh text crlf=input	eol=lf
 /justfile text crlf=input	eol=lf
+
+# have GitHub treat the gix-packetline-blocking src copy as auto-generated
+gix-packetline-blocking/src/ linguist-generated=true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -176,7 +176,7 @@ jobs:
           # Let's not fail CI for this, it will fail locally often enough, and a crate a little bigger
           # than allows is no problem either if it comes to that.
           just check-size || true
-  
+
   cargo-deny:
     runs-on: ubuntu-latest
     strategy:
@@ -193,6 +193,7 @@ jobs:
       - uses: EmbarkStudios/cargo-deny-action@v1
         with:
           command: check ${{ matrix.checks }}
+
   wasm:
     name: WebAssembly
     runs-on: ubuntu-latest
@@ -213,3 +214,32 @@ jobs:
         name: crates with 'wasm' feature
       - run: cd gix-pack && cargo build --all-features --target ${{ matrix.target }}
         name: gix-pack with all features (including wasm)
+
+  check-packetline:
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          # We consider this script read-only and its effect is the same everywhere.
+          # However, when changes are made to `etc/copy-packetline.sh`, re-enable the other platforms for testing.
+    #       - macos-latest
+    #       - windows-latest
+    runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v4
+      - name: Check that working tree is initially clean
+        run: |
+          set -x
+          git status
+          git diff --exit-code
+      - name: Regenerate gix-packetline-blocking/src
+        run: etc/copy-packetline.sh
+      - name: Check that gix-packetline-blocking/src was already up to date
+        run: |
+          set -x
+          git status
+          git diff --exit-code
diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+
+set -euC -o pipefail
+
+readonly input_dir='gix-packetline/src'
+readonly output_parent_dir='gix-packetline-blocking'
+readonly output_dir="$output_parent_dir/src"
+
+function fail () {
+  printf '%s: error: %s\n' "$0" "$1" >&2
+  exit 1
+}
+
+function chdir_toplevel () {
+  local root_padded root
+
+  # Find the working tree's root. (Padding covers the trailing-newline case.)
+  root_padded="$(git rev-parse --show-toplevel && echo -n .)" ||
+    fail 'git-rev-parse failed to find top-level dir'
+  root="${root_padded%$'\n.'}"
+
+  cd -- "$root"
+}
+
+function merging () {
+  local git_dir_padded git_dir
+
+  # Find the .git directory. (Padding covers the trailing-newline case.)
+  git_dir_padded="$(git rev-parse --git-dir && echo -n .)" ||
+    fail 'git-rev-parse failed to find git dir'
+  git_dir="${git_dir_padded%$'\n.'}"
+
+  test -e "$git_dir/MERGE_HEAD"
+}
+
+function output_dir_status () {
+  git status --porcelain --ignored=traditional -- "$output_dir" ||
+    fail 'git-status failed'
+}
+
+function check_output_dir () {
+  if ! test -e "$output_dir"; then
+    # The destination does not exist on disk, so nothing will be lost. Proceed.
+    return
+  fi
+
+  if merging; then
+    # In a merge, it would be confusing to replace anything at the destination.
+    if output_dir_status | grep -q '^'; then
+      fail 'output location exists, and a merge is in progress'
+    fi
+  else
+    # We can lose data if anything of value at the destination is not in the
+    # index. (This includes unstaged deletions, for two reasons. We could lose
+    # track of which files had been deleted. More importantly, replacing a
+    # staged symlink or regular file with an unstaged directory is shown by
+    # git-status as only a deletion, even if the directory is non-empty.)
+    if output_dir_status | grep -q '^.[^ ]'; then
+      fail 'output location exists, with unstaged changes or ignored files'
+    fi
+  fi
+}
+
+function first_line_ends_crlf () {
+  # This is tricky to check portably. In Cygwin-like environments including
+  # MSYS2 and Git Bash, most text processing tools, including awk, sed, and
+  # grep, automatically ignore \r before \n. Some ignore \r everywhere. Some
+  # can be told to keep \r, but in non-portable ways that may affect other
+  # implementations. Bash ignores \r in some places even without "-o igncr",
+  # and ignores \r even more with it, including in all text from command
+  # substitution. Simple checks may be non-portable to other OSes. Fortunately,
+  # tools that treat input as binary data are exempt (even cat, but "-v" is
+  # non-portable, and unreliable in general because lines can end in "^M").
+  # This may be doable without od, by using tr more heavily, but it could be
+  # hard to avoid false positives with unexpected characters or \r without \n.
+
+  head -n 1 -- "$1" |  # Get the longest prefix with no non-trailing \n byte.
+    od -An -ta |       # Represent all bytes symbolically, without addresses.
+    tr -sd '\n' ' ' |  # Scrunch into one line, so "cr nl" appears as such.
+    grep -q 'cr nl$'   # Check if the result signifies a \r\n line ending.
+}
+
+function make_header () {
+  local input_file endline
+
+  input_file="$1"
+  endline="$2"
+
+  # shellcheck disable=SC2016  # The backticks are intentionally literal.
+  printf '// DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \
+    "$input_file" "$endline" "$endline"
+}
+
+function copy_with_header () {
+  local input_file output_file endline
+
+  input_file="$1"
+  output_file="$2"
+
+  if first_line_ends_crlf "$input_file"; then
+    endline=$'\r\n'
+  else
+    endline=$'\n'
+  fi
+
+  make_header "$input_file" "$endline" | cat -- - "$input_file" >"$output_file"
+}
+
+function generate_one () {
+  local input_file output_file
+
+  input_file="$1"
+  output_file="$output_dir${input_file#"$input_dir"}"
+
+  if test -d "$input_file"; then
+    mkdir -p -- "$output_file"
+  elif test -L "$input_file"; then
+    # Cover this case separately, for more useful error messages.
+    fail "input file is symbolic link: $input_file"
+  elif ! test -f "$input_file"; then
+    # This covers less common kinds of files we can't or shouldn't process.
+    fail "input file neither regular file nor directory: $input_file"
+  elif [[ "$input_file" =~ \.rs$ ]]; then
+    copy_with_header "$input_file" "$output_file"
+  else
+    fail "input file not named as Rust source code: $input_file"
+  fi
+}
+
+function generate_all () {
+  local input_file
+
+  if ! test -d "$input_dir"; then
+    fail "no input directory: $input_dir"
+  fi
+  if ! test -d "$output_parent_dir"; then
+    fail "no output parent directory: $output_parent_dir"
+  fi
+  check_output_dir
+
+  rm -rf -- "$output_dir"  # It may be a directory, symlink, or regular file.
+  if test -e "$output_dir"; then
+    fail 'unable to remove output location'
+  fi
+
+  find "$input_dir" -print0 | while IFS= read -r -d '' input_file; do
+    generate_one "$input_file"
+  done
+}
+
+chdir_toplevel
+generate_all
diff --git a/gix-packetline-blocking/src b/gix-packetline-blocking/src
diff --git a/gix-packetline-blocking/src/decode.rs b/gix-packetline-blocking/src/decode.rs
@@ -0,0 +1,148 @@
+// DO NOT EDIT - this is a copy of gix-packetline/src/decode.rs. Run `just copy-packetline` to update it.
+
+use bstr::BString;
+
+use crate::{PacketLineRef, DELIMITER_LINE, FLUSH_LINE, MAX_DATA_LEN, MAX_LINE_LEN, RESPONSE_END_LINE, U16_HEX_BYTES};
+
+/// The error used in the [`decode`][mod@crate::decode] module
+#[derive(Debug, thiserror::Error)]
+#[allow(missing_docs)]
+pub enum Error {
+    #[error("Failed to decode the first four hex bytes indicating the line length: {err}")]
+    HexDecode { err: String },
+    #[error("The data received claims to be larger than the maximum allowed size: got {length_in_bytes}, exceeds {MAX_DATA_LEN}")]
+    DataLengthLimitExceeded { length_in_bytes: usize },
+    #[error("Received an invalid empty line")]
+    DataIsEmpty,
+    #[error("Received an invalid line of length 3")]
+    InvalidLineLength,
+    #[error("{data:?} - consumed {bytes_consumed} bytes")]
+    Line { data: BString, bytes_consumed: usize },
+    #[error("Needing {bytes_needed} additional bytes to decode the line successfully")]
+    NotEnoughData { bytes_needed: usize },
+}
+
+///
+#[allow(clippy::empty_docs)]
+pub mod band {
+    /// The error used in [`PacketLineRef::decode_band()`][super::PacketLineRef::decode_band()].
+    #[derive(Debug, thiserror::Error)]
+    #[allow(missing_docs)]
+    pub enum Error {
+        #[error("attempt to decode a non-side channel line or input was malformed: {band_id}")]
+        InvalidSideBand { band_id: u8 },
+        #[error("attempt to decode a non-data line into a side-channel band")]
+        NonDataLine,
+    }
+}
+
+/// A utility return type to support incremental parsing of packet lines.
+#[derive(Debug, Clone)]
+pub enum Stream<'a> {
+    /// Indicate a single packet line was parsed completely
+    Complete {
+        /// The parsed packet line
+        line: PacketLineRef<'a>,
+        /// The amount of bytes consumed from input
+        bytes_consumed: usize,
+    },
+    /// A packet line could not yet be parsed due to missing bytes
+    Incomplete {
+        /// The amount of additional bytes needed for the parsing to complete
+        bytes_needed: usize,
+    },
+}
+
+/// The result of [`hex_prefix()`] indicating either a special packet line or the amount of wanted bytes
+pub enum PacketLineOrWantedSize<'a> {
+    /// The special kind of packet line decoded from the hex prefix. It never contains actual data.
+    Line(PacketLineRef<'a>),
+    /// The amount of bytes indicated by the hex prefix of the packet line.
+    Wanted(u16),
+}
+
+/// Decode the `four_bytes` packet line prefix provided in hexadecimal form and check it for validity.
+pub fn hex_prefix(four_bytes: &[u8]) -> Result<PacketLineOrWantedSize<'_>, Error> {
+    debug_assert_eq!(four_bytes.len(), 4, "need four hex bytes");
+    for (line_bytes, line_type) in &[
+        (FLUSH_LINE, PacketLineRef::Flush),
+        (DELIMITER_LINE, PacketLineRef::Delimiter),
+        (RESPONSE_END_LINE, PacketLineRef::ResponseEnd),
+    ] {
+        if four_bytes == *line_bytes {
+            return Ok(PacketLineOrWantedSize::Line(*line_type));
+        }
+    }
+
+    let mut buf = [0u8; U16_HEX_BYTES / 2];
+    faster_hex::hex_decode(four_bytes, &mut buf).map_err(|err| Error::HexDecode { err: err.to_string() })?;
+    let wanted_bytes = u16::from_be_bytes(buf);
+
+    if wanted_bytes == 3 {
+        return Err(Error::InvalidLineLength);
+    }
+    if wanted_bytes == 4 {
+        return Err(Error::DataIsEmpty);
+    }
+    debug_assert!(
+        wanted_bytes as usize > U16_HEX_BYTES,
+        "by now there should be more wanted bytes than prefix bytes"
+    );
+    Ok(PacketLineOrWantedSize::Wanted(wanted_bytes - U16_HEX_BYTES as u16))
+}
+
+/// Obtain a `PacketLine` from `data` after assuring `data` is small enough to fit.
+pub fn to_data_line(data: &[u8]) -> Result<PacketLineRef<'_>, Error> {
+    if data.len() > MAX_LINE_LEN {
+        return Err(Error::DataLengthLimitExceeded {
+            length_in_bytes: data.len(),
+        });
+    }
+
+    Ok(PacketLineRef::Data(data))
+}
+
+/// Decode `data` as packet line while reporting whether the data is complete or not using a [`Stream`].
+pub fn streaming(data: &[u8]) -> Result<Stream<'_>, Error> {
+    let data_len = data.len();
+    if data_len < U16_HEX_BYTES {
+        return Ok(Stream::Incomplete {
+            bytes_needed: U16_HEX_BYTES - data_len,
+        });
+    }
+    let wanted_bytes = match hex_prefix(&data[..U16_HEX_BYTES])? {
+        PacketLineOrWantedSize::Wanted(s) => s as usize,
+        PacketLineOrWantedSize::Line(line) => {
+            return Ok(Stream::Complete {
+                line,
+                bytes_consumed: 4,
+            })
+        }
+    } + U16_HEX_BYTES;
+    if wanted_bytes > MAX_LINE_LEN {
+        return Err(Error::DataLengthLimitExceeded {
+            length_in_bytes: wanted_bytes,
+        });
+    }
+    if data_len < wanted_bytes {
+        return Ok(Stream::Incomplete {
+            bytes_needed: wanted_bytes - data_len,
+        });
+    }
+
+    Ok(Stream::Complete {
+        line: to_data_line(&data[U16_HEX_BYTES..wanted_bytes])?,
+        bytes_consumed: wanted_bytes,
+    })
+}
+
+/// Decode an entire packet line from data or fail.
+///
+/// Note that failure also happens if there is not enough data to parse a complete packet line, as opposed to [`streaming()`] decoding
+/// succeeds in that case, stating how much more bytes are required.
+pub fn all_at_once(data: &[u8]) -> Result<PacketLineRef<'_>, Error> {
+    match streaming(data)? {
+        Stream::Complete { line, .. } => Ok(line),
+        Stream::Incomplete { bytes_needed } => Err(Error::NotEnoughData { bytes_needed }),
+    }
+}