From c957ab86c003483c957f2f7a8396635ac0640cf6 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sat, 6 Apr 2024 21:20:06 -0400 Subject: [PATCH 01/19] Start on auto-generating gix-packetline-blocking/src By copying the files with added headers, instead of having a symlink. Right now this is entirely as a shell script. It may be beneficial to rewrite at least some parts, especially those that are unnatural to implement portably as a shell script even given Bash, in Rust. --- .gitattributes | 5 +- etc/copy-packetline.sh | 147 +++++++++++++++++++++++++++++++++++++++++ justfile | 4 ++ 3 files changed, 155 insertions(+), 1 deletion(-) create mode 100755 etc/copy-packetline.sh diff --git a/.gitattributes b/.gitattributes index 21a8a6960ba..c3e94f4dbf1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,8 @@ **/generated-archives/*.tar.xz filter=lfs-disabled diff=lfs merge=lfs -text -# assure line feeds don't interfere with our working copy hash +# assure line feeds don't interfere with our working copy hash **/tests/fixtures/**/*.sh text crlf=input eol=lf /justfile text crlf=input eol=lf + +# have GitHub treat the gix-packetline-blocking src copy as auto-generated +gix-packetline-blocking/src/ linguist-generated=true diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh new file mode 100755 index 00000000000..f8bd57d9abd --- /dev/null +++ b/etc/copy-packetline.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +set -euC -o pipefail + +function usage() { + local name + + name="$(basename -- "$0")" + printf '%s [--all] regenerate gix-packetline-blocking source\n' "$name" + printf '%s --file {path} regenerate a single file (avoid; prefer --all)\n' "$name" + printf '%s --help print this message\n' "$name" +} + +function fail () { + printf '%s: error: %s\n' "$0" "$1" >&2 + exit 1 +} + +function status () { + git status --short --ignored=traditional -- "$@" +} + +function avoids_pattern () ( + set +e # Temporary, since the function body is in ( ). + grep -q "$@" + test "$?" -eq 1 # Distinguish no-match from error. +) + +function indent () { + sed 's/^/ /' +} + +function generate_all () { + local root failures + + root="$(git rev-parse --show-toplevel)/." # /. in case name ends in newline. + cd -- "$root" + + if ! test -d gix-packetline/src; then + fail 'no source directory: gix-packetline/src' + fi + if ! test -d gix-packetline-blocking; then + fail 'no target parent directory: gix-packetline-blocking' + fi + + # FIXME: This misinterprets the status when in an unresolved merge conflict! + if ! status gix-packetline-blocking/src | avoids_pattern '^.[^ ]'; then + fail 'target has unstaged changes or contains ignored files' + fi + + rm -rf gix-packetline-blocking/src # No trailing /, as it may be a symlink. + if test -e gix-packetline-blocking/src; then + fail 'unable to remove target' + fi + + failures="$(find gix-packetline/src/ -exec "$0" --file {} \; -o -print)" + + # If we get here, traversal succeeded, but perhaps some generations failed. + if test -n "$failures"; then + fail $'failed to generate from:\n'"$(indent <<<"$failures")" + fi +} + +function first_line_ends_crlf () { + # This is tricky to check portably. On Windows in Cygwin-like environments + # including MSYS2 and Git Bash, most text processing tools, including awk, + # sed, and grep, automatically substitute \n for \r\n. Some can be told not + # to, but in non-portable ways that may affect other implementations. Bash + # does so on command substitution and other input, and optionally more often. + # Easy ways to check are often non-portable to other OSes. Fortunately, tools + # that treat input as binary data are exempt (including cat, but "-v" is not + # portable, and it's unreliable in general as lines can end in "^M"). This + # may be doable without od, by using tr more heavily, but it could be hard to + # avoid false positives with unexpected characters, or with \r not before \n. + + head -n 1 -- "$1" | # Get the longest prefix with no non-trailing \n byte. + od -An -ta | # Show all bytes symbolically, without addresses. + tr -sd '\n' ' ' | # Scrunch into one line, so "cr nl" appears as such. + grep -q 'cr nl$' # Check if the result signifies a \r\n line ending. +} + +function make_header () { + local source endline + source="$1" + endline="$2" + + # shellcheck disable=SC2016 # The backticks are intentionally literal. + printf '//! DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \ + "$source" "$endline" "$endline" +} + +function copy_with_header () { + local source target endline + + source="$1" + target="$2" + + if first_line_ends_crlf "$source"; then + endline=$'\r\n' + else + endline=$'\n' + fi + + make_header "$source" "$endline" | cat - -- "$source" >"$target" +} + +function generate_one () { + local source shared target + + source="$1" + shared="${source#gix-packetline/src/}" + if test "$source" = "$shared"; then + fail "source path seems to be outside gix-packetline/src/: $source" + fi + target="gix-packetline-blocking/src/$shared" + + if test -d "$source"; then + mkdir -p -- "$target" + elif test -L "$source"; then + # Cover this case separately, for more useful error messages. + fail "source file is symbolic link: $source" + elif ! test -f "$source"; then + # This covers less common kinds of files we can't/shouldn't process. + fail "source file neither regular file nor directory: $source" + elif [[ "$source" =~ \.rs$ ]]; then + copy_with_header "$source" "$target" + else + fail "source file not named as Rust source code: $source" + fi +} + +case "$0" in +*{}*) + # Some "find" implementations expand "{}" even inside a larger argument. + fail "can't operate portably with literal {} in command name" + ;; +esac + +if { test "$#" -eq 1 && test "$1" = '--all'; } || test "$#" -eq 0; then + generate_all +elif test "$#" -eq 2 && test "$1" = '--file'; then + generate_one "$2" +elif test "$#" -eq 1 && test "$1" = '--help'; then + usage +else + fail 'unrecognized syntax, try passing only --help for usage' +fi diff --git a/justfile b/justfile index e8890319849..8979a7777fd 100755 --- a/justfile +++ b/justfile @@ -252,3 +252,7 @@ fmt: # Cancel this after the first few seconds, as yanked crates will appear in warnings. find-yanked: cargo install --debug --locked --no-default-features --features max-pure --path . + +# Delete gix-packetline-blocking/src and regenerate from gix-packetline/src +copy-packetline: + ./etc/copy-packetline.sh From 2e837668776651c1624e9fafd32904e1177c7e17 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 7 Apr 2024 12:08:43 -0400 Subject: [PATCH 02/19] Fix script bug where running from a subdirectory didn't work This was always intended to work, but didn't because script subprocesses always used the original command name/path, but this was after changing directory. The script already relies on the repository being organized a particular way, so relying on itself (or another script suitable to do its work) being at the known location is no worse. This also makes it unnecessary to guard against the rare {}-in-path case. This change has the further benefit that if the script is ever successfully invoked via a Windows-style path with "\" separators (which can happen on Windows), its subprocesses can still execute. This also removes an altogether ineffective attempt to support the rare case of a working tree whole top-level directory name ends in a newline character, and instead changes the comment to note that this does work. A few other comments throughout are reworded for clarity as well. It may make sense to change the script more deeply than this (even before replacing part of it with a Rust tool). By sacrificing a small amount of portability, a loop could be used, as find recognizes -print0 in GNU/Linux (via GNU findutils), most BSD systems including macOS, and busybox (thus also Alpine Linux). Unfortunately a recursive glob, which would be even nicer, is not supported by versions of bash that ship with macOS. --- etc/copy-packetline.sh | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index f8bd57d9abd..386565c4248 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -33,7 +33,8 @@ function indent () { function generate_all () { local root failures - root="$(git rev-parse --show-toplevel)/." # /. in case name ends in newline. + # Find the working tree. (NOTE: Wrong if the directory name ends in newline.) + root="$(git rev-parse --show-toplevel)" cd -- "$root" if ! test -d gix-packetline/src; then @@ -53,7 +54,11 @@ function generate_all () { fail 'unable to remove target' fi - failures="$(find gix-packetline/src/ -exec "$0" --file {} \; -o -print)" + failures="$( + find gix-packetline/src/ \ + -exec etc/copy-packetline.sh --file {} \; \ + -o -print + )" # If we get here, traversal succeeded, but perhaps some generations failed. if test -n "$failures"; then @@ -74,7 +79,7 @@ function first_line_ends_crlf () { # avoid false positives with unexpected characters, or with \r not before \n. head -n 1 -- "$1" | # Get the longest prefix with no non-trailing \n byte. - od -An -ta | # Show all bytes symbolically, without addresses. + od -An -ta | # Represent all bytes symbolically, without addresses. tr -sd '\n' ' ' | # Scrunch into one line, so "cr nl" appears as such. grep -q 'cr nl$' # Check if the result signifies a \r\n line ending. } @@ -120,7 +125,7 @@ function generate_one () { # Cover this case separately, for more useful error messages. fail "source file is symbolic link: $source" elif ! test -f "$source"; then - # This covers less common kinds of files we can't/shouldn't process. + # This covers less common kinds of files we can't or shouldn't process. fail "source file neither regular file nor directory: $source" elif [[ "$source" =~ \.rs$ ]]; then copy_with_header "$source" "$target" @@ -129,13 +134,6 @@ function generate_one () { fi } -case "$0" in -*{}*) - # Some "find" implementations expand "{}" even inside a larger argument. - fail "can't operate portably with literal {} in command name" - ;; -esac - if { test "$#" -eq 1 && test "$1" = '--all'; } || test "$#" -eq 0; then generate_all elif test "$#" -eq 2 && test "$1" = '--file'; then From e4e128cf386a56cb8664ce97437664a50f573b6c Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 7 Apr 2024 19:12:31 -0400 Subject: [PATCH 03/19] Proceed when target is absent; account for merges This modifies the script so that: - When the target does not exist on disk at all, we proceed, even if the deletion is an unstaged change. - When there is an uncommitted merge, as happens when a conflict is being resolved (and occasionally otherwise), recognize this case (rather than misinterpreting the short status output), proceeding only if there are no changes at all at the target. Even staged changes cause the copy to be cancelled in that case. - Some cumbersomely expressed code is written more clearly, a couple of specific error messages are added for what should be rare failures of git commands, checking for grep failure is removed (grep should not actually fail if used correctly), and the big comment in first_line_ends_crlf is made more accurate. --- etc/copy-packetline.sh | 82 ++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 386565c4248..16599477d3d 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -16,26 +16,58 @@ function fail () { exit 1 } -function status () { - git status --short --ignored=traditional -- "$@" +function chdir_toplevel() { + local root + + # NOTE: We get the wrong directory name, if the name ends in newline. + root="$(git rev-parse --show-toplevel)" || + fail 'git-rev-parse failed to find top-level dir' + + cd -- "$root" +} + +function merging () { + local git_dir + + # NOTE: We get the wrong directory name, if the name ends in newline. + git_dir="$(git rev-parse --git-dir)" || + fail 'git-rev-parse failed to find git dir' + + test -e "$git_dir/MERGE_HEAD" +} + +function target_status () { + git status --short --ignored=traditional -- gix-packetline-blocking/src || + fail 'git-status failed' } -function avoids_pattern () ( - set +e # Temporary, since the function body is in ( ). - grep -q "$@" - test "$?" -eq 1 # Distinguish no-match from error. -) +function check_target () { + if ! test -e "gix-packetline-blocking/src"; then + # The target does not exist on disk, so nothing will be lost. Proceed. + return + fi + + if merging; then + # In a merge, it would be confusing to replace anything at the target. + if target_status | grep -q '^'; then + fail 'target exists, and a merge is in progress' + fi + else + # We can lose data if anything of value at the target is not in the index. + if target_status | grep -q '^.[^ ]'; then + fail 'target exists, with unstaged changes or ignored files' + fi + fi +} function indent () { sed 's/^/ /' } function generate_all () { - local root failures + local failures - # Find the working tree. (NOTE: Wrong if the directory name ends in newline.) - root="$(git rev-parse --show-toplevel)" - cd -- "$root" + chdir_toplevel if ! test -d gix-packetline/src; then fail 'no source directory: gix-packetline/src' @@ -44,12 +76,9 @@ function generate_all () { fail 'no target parent directory: gix-packetline-blocking' fi - # FIXME: This misinterprets the status when in an unresolved merge conflict! - if ! status gix-packetline-blocking/src | avoids_pattern '^.[^ ]'; then - fail 'target has unstaged changes or contains ignored files' - fi + check_target - rm -rf gix-packetline-blocking/src # No trailing /, as it may be a symlink. + rm -rf gix-packetline-blocking/src # No trailing /. It may be a symlink. if test -e gix-packetline-blocking/src; then fail 'unable to remove target' fi @@ -67,16 +96,17 @@ function generate_all () { } function first_line_ends_crlf () { - # This is tricky to check portably. On Windows in Cygwin-like environments - # including MSYS2 and Git Bash, most text processing tools, including awk, - # sed, and grep, automatically substitute \n for \r\n. Some can be told not - # to, but in non-portable ways that may affect other implementations. Bash - # does so on command substitution and other input, and optionally more often. - # Easy ways to check are often non-portable to other OSes. Fortunately, tools - # that treat input as binary data are exempt (including cat, but "-v" is not - # portable, and it's unreliable in general as lines can end in "^M"). This - # may be doable without od, by using tr more heavily, but it could be hard to - # avoid false positives with unexpected characters, or with \r not before \n. + # This is tricky to check portably. In Cygwin-like environments including + # MSYS2 and Git Bash, most text processing tools, including awk, sed, and + # grep, automatically ignore \r before \n. Some ignore \r everywhere. Some + # can be told to keep \r, but in non-portable ways that may affect other + # implementations. Bash ignores \r in some places even without "-o icncr", + # and ignores \r even more with it, including in all text from command + # substitution. Simple checks may be non-portable to other OSes. Fortunately, + # tools that treat input as binary data are exempt (even cat, but "-v" is + # non-portable, and unreliable in general because lines can end in "^M"). + # This may be doable without od, by using tr more heavily, but it could be + # hard to avoid false positives with unexpected characters or \r without \n. head -n 1 -- "$1" | # Get the longest prefix with no non-trailing \n byte. od -An -ta | # Represent all bytes symbolically, without addresses. From 1125ca0301f449abdcf527d6340a56214094561a Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 7 Apr 2024 21:30:32 -0400 Subject: [PATCH 04/19] Properly cover directory names with trailing newlines This doesn't happen on Windows filesystems, but can happen elsewhere. It can happen on occasion by accident. --- etc/copy-packetline.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 16599477d3d..ecde7c25bad 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -17,21 +17,23 @@ function fail () { } function chdir_toplevel() { - local root + local root_padded root - # NOTE: We get the wrong directory name, if the name ends in newline. - root="$(git rev-parse --show-toplevel)" || + # Find the working tree's root. (Padding is for the trailing-newline case.) + root_padded="$(git rev-parse --show-toplevel && echo -n .)" || fail 'git-rev-parse failed to find top-level dir' + root="${root_padded%$'\n.'}" cd -- "$root" } function merging () { - local git_dir + local git_dir_padded git_dir - # NOTE: We get the wrong directory name, if the name ends in newline. - git_dir="$(git rev-parse --git-dir)" || + # Find the .git directory. (Padding is for the trailing-newline case.) + git_dir_padded="$(git rev-parse --git-dir && echo -n .)" || fail 'git-rev-parse failed to find git dir' + git_dir="${git_dir_padded%$'\n.'}" test -e "$git_dir/MERGE_HEAD" } From ee9549724b3d0e7edde25d300267bc156bc1fd4b Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 7 Apr 2024 23:37:34 -0400 Subject: [PATCH 05/19] Simplify script by assuming find supports -print0 `find -print0` is not POSIX, but supported systems include: - GNU/Linux systems, using GNU findutils - Other Linux-based systems using BusyBox find (e.g. Alpine Linux) - BSD systems, including macOS (all versions, since 10) Since this is a Bash script, and even the old versions of Bash on macOS systems support `read -d ''` to read null-byte-delimited input, assuming `find -print0` is supported everywhere needed, this is highly likely to work everywhere needed. It allows filenames to be safely read even if they contain weird characters including newlines, as can happen accidentally (hopefully not intentionally). It would be nicer to loop through the result of a recursive glob (globstar, **), but Bash only supports that starting in version 4, which no version of macOS ships (or is likely to ship). The benefit of this approach, compared to the `find -exec` way used before, is that the code is simpler, no longer needs to call itself as a subprocess through find, and no longer needs a help to clarify a mode that is not meant to be used from the outside. (It may also be slightly faster.) This makes some behavioral changes, in areas where the design had been driven by the implementation rather than the other way around: - We stop on the first failure. - Because of that, there is no need to restate what files could not be generated (it is at most one, and the failure should show it). - As noted above, this script can no longer be invoked to process an individual file (which as not a design goal), and it therefore no longer accepts any command-line arguments (they are ignored). This also fixes a misspelling the big first_line_ends_crlf comment. --- etc/copy-packetline.sh | 89 ++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 60 deletions(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index ecde7c25bad..81d8c72378b 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -2,15 +2,6 @@ set -euC -o pipefail -function usage() { - local name - - name="$(basename -- "$0")" - printf '%s [--all] regenerate gix-packetline-blocking source\n' "$name" - printf '%s --file {path} regenerate a single file (avoid; prefer --all)\n' "$name" - printf '%s --help print this message\n' "$name" -} - function fail () { printf '%s: error: %s\n' "$0" "$1" >&2 exit 1 @@ -62,47 +53,12 @@ function check_target () { fi } -function indent () { - sed 's/^/ /' -} - -function generate_all () { - local failures - - chdir_toplevel - - if ! test -d gix-packetline/src; then - fail 'no source directory: gix-packetline/src' - fi - if ! test -d gix-packetline-blocking; then - fail 'no target parent directory: gix-packetline-blocking' - fi - - check_target - - rm -rf gix-packetline-blocking/src # No trailing /. It may be a symlink. - if test -e gix-packetline-blocking/src; then - fail 'unable to remove target' - fi - - failures="$( - find gix-packetline/src/ \ - -exec etc/copy-packetline.sh --file {} \; \ - -o -print - )" - - # If we get here, traversal succeeded, but perhaps some generations failed. - if test -n "$failures"; then - fail $'failed to generate from:\n'"$(indent <<<"$failures")" - fi -} - function first_line_ends_crlf () { # This is tricky to check portably. In Cygwin-like environments including # MSYS2 and Git Bash, most text processing tools, including awk, sed, and # grep, automatically ignore \r before \n. Some ignore \r everywhere. Some # can be told to keep \r, but in non-portable ways that may affect other - # implementations. Bash ignores \r in some places even without "-o icncr", + # implementations. Bash ignores \r in some places even without "-o igncr", # and ignores \r even more with it, including in all text from command # substitution. Simple checks may be non-portable to other OSes. Fortunately, # tools that treat input as binary data are exempt (even cat, but "-v" is @@ -118,6 +74,7 @@ function first_line_ends_crlf () { function make_header () { local source endline + source="$1" endline="$2" @@ -142,14 +99,10 @@ function copy_with_header () { } function generate_one () { - local source shared target + local source target source="$1" - shared="${source#gix-packetline/src/}" - if test "$source" = "$shared"; then - fail "source path seems to be outside gix-packetline/src/: $source" - fi - target="gix-packetline-blocking/src/$shared" + target="gix-packetline-blocking/src/${source#gix-packetline/src/}" if test -d "$source"; then mkdir -p -- "$target" @@ -166,12 +119,28 @@ function generate_one () { fi } -if { test "$#" -eq 1 && test "$1" = '--all'; } || test "$#" -eq 0; then - generate_all -elif test "$#" -eq 2 && test "$1" = '--file'; then - generate_one "$2" -elif test "$#" -eq 1 && test "$1" = '--help'; then - usage -else - fail 'unrecognized syntax, try passing only --help for usage' -fi +function generate_all () { + local source + + chdir_toplevel + + if ! test -d gix-packetline/src; then + fail 'no source directory: gix-packetline/src' + fi + if ! test -d gix-packetline-blocking; then + fail 'no target parent directory: gix-packetline-blocking' + fi + + check_target + + rm -rf gix-packetline-blocking/src # No trailing "/" as it may be a symlink. + if test -e gix-packetline-blocking/src; then + fail 'unable to remove target' + fi + + find gix-packetline/src/ -print0 | while IFS= read -r -d '' source; do + generate_one "$source" + done +} + +generate_all From b312d8d47d30eeda2df1cb95c20be304c026ab8d Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 01:04:22 -0400 Subject: [PATCH 06/19] Extract constants; rename functions/variables for clarity This also refactors slightly in other ways, clarifies a comment, and improves an error message. --- etc/copy-packetline.sh | 86 ++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 81d8c72378b..688f16aa4f0 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -2,12 +2,16 @@ set -euC -o pipefail +readonly source_dir='gix-packetline/src' +readonly target_parent_dir='gix-packetline-blocking' +readonly target_dir="$target_parent_dir/src" + function fail () { printf '%s: error: %s\n' "$0" "$1" >&2 exit 1 } -function chdir_toplevel() { +function chdir_toplevel () { local root_padded root # Find the working tree's root. (Padding is for the trailing-newline case.) @@ -29,25 +33,26 @@ function merging () { test -e "$git_dir/MERGE_HEAD" } -function target_status () { - git status --short --ignored=traditional -- gix-packetline-blocking/src || +function target_dir_status () { + git status --short --ignored=traditional -- "$target_dir" || fail 'git-status failed' } -function check_target () { - if ! test -e "gix-packetline-blocking/src"; then +function check_target_dir () { + if ! test -e "$target_dir"; then # The target does not exist on disk, so nothing will be lost. Proceed. return fi if merging; then # In a merge, it would be confusing to replace anything at the target. - if target_status | grep -q '^'; then + if target_dir_status | grep -q '^'; then fail 'target exists, and a merge is in progress' fi else # We can lose data if anything of value at the target is not in the index. - if target_status | grep -q '^.[^ ]'; then + # (Even unstaged deletions, for we can forget what was and wasn't deleted.) + if target_dir_status | grep -q '^.[^ ]'; then fail 'target exists, with unstaged changes or ignored files' fi fi @@ -73,74 +78,73 @@ function first_line_ends_crlf () { } function make_header () { - local source endline + local source_file endline - source="$1" + source_file="$1" endline="$2" # shellcheck disable=SC2016 # The backticks are intentionally literal. printf '//! DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \ - "$source" "$endline" "$endline" + "$source_file" "$endline" "$endline" } function copy_with_header () { - local source target endline + local source_file target_file endline - source="$1" - target="$2" + source_file="$1" + target_file="$2" - if first_line_ends_crlf "$source"; then + if first_line_ends_crlf "$source_file"; then endline=$'\r\n' else endline=$'\n' fi - make_header "$source" "$endline" | cat - -- "$source" >"$target" + make_header "$source_file" "$endline" | + cat - -- "$source_file" >"$target_file" } function generate_one () { - local source target + local source_file target_file - source="$1" - target="gix-packetline-blocking/src/${source#gix-packetline/src/}" + source_file="$1" + target_file="$target_dir/${source_file#"$source_dir"/}" - if test -d "$source"; then - mkdir -p -- "$target" - elif test -L "$source"; then + if test -d "$source_file"; then + mkdir -p -- "$target_file" + elif test -L "$source_file"; then # Cover this case separately, for more useful error messages. - fail "source file is symbolic link: $source" - elif ! test -f "$source"; then + fail "source file is symbolic link: $source_file" + elif ! test -f "$source_file"; then # This covers less common kinds of files we can't or shouldn't process. - fail "source file neither regular file nor directory: $source" - elif [[ "$source" =~ \.rs$ ]]; then - copy_with_header "$source" "$target" + fail "source file neither regular file nor directory: $source_file" + elif [[ "$source_file" =~ \.rs$ ]]; then + copy_with_header "$source_file" "$target_file" else - fail "source file not named as Rust source code: $source" + fail "source file not named as Rust source code: $source_file" fi } function generate_all () { - local source - - chdir_toplevel + local source_file - if ! test -d gix-packetline/src; then - fail 'no source directory: gix-packetline/src' + if ! test -d "$source_dir"; then + fail "no source directory: $source_dir" fi - if ! test -d gix-packetline-blocking; then - fail 'no target parent directory: gix-packetline-blocking' + if ! test -d "$target_parent_dir"; then + fail "no target parent directory: $target_parent_dir" fi + check_target_dir - check_target - - rm -rf gix-packetline-blocking/src # No trailing "/" as it may be a symlink. - if test -e gix-packetline-blocking/src; then - fail 'unable to remove target' + rm -rf -- "$target_dir" # It may be a directory, symlink, or regular file. + if test -e "$target_dir"; then + fail 'unable to remove target location' fi - find gix-packetline/src/ -print0 | while IFS= read -r -d '' source; do - generate_one "$source" + find "$source_dir/" -print0 | while IFS= read -r -d '' source_file; do + generate_one "$source_file" done } +chdir_toplevel generate_all From deeac5effc46ebe74a3faad4b90c16147d0d9b2d Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 19:01:38 -0400 Subject: [PATCH 07/19] Add CI checks for copy-packetline For the real purpose of this new check, which is to verify that `just copy-packetline` has been run when needed, to ensure that gix-packetline-blocking/src is not out of date, it should only be necessary to run this check on one platform. So this should probably be changed to `runs-on: ubuntu-latest` instead of a matrix strategy soon. I'm running it on three platforms initially to check the script (though some functionality of the script is not exercised). I've also tested the script locally on Ubuntu 22.04.4 LTS, and on Windows 10 (in Git Bash). --- .github/workflows/ci.yml | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e2901d1a930..bcb702ff2e8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -176,7 +176,7 @@ jobs: # Let's not fail CI for this, it will fail locally often enough, and a crate a little bigger # than allows is no problem either if it comes to that. just check-size || true - + cargo-deny: runs-on: ubuntu-latest strategy: @@ -193,6 +193,7 @@ jobs: - uses: EmbarkStudios/cargo-deny-action@v1 with: command: check ${{ matrix.checks }} + wasm: name: WebAssembly runs-on: ubuntu-latest @@ -213,3 +214,34 @@ jobs: name: crates with 'wasm' feature - run: cd gix-pack && cargo build --all-features --target ${{ matrix.target }} name: gix-pack with all features (including wasm) + + check-packetline: + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + continue-on-error: true + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@v1 + with: + toolchain: stable + - uses: extractions/setup-just@v2 + - name: Check that working tree is initially clean + run: | + git status + set -x + git status | grep -qF 'nothing to commit, working tree clean' + - name: Regenerate gix-packetline-blocking/src + run: just copy-packetline + - name: Check that gix-packetline-blocking/src was already up to date + run: | + git status + set -x + git status | grep -qF 'nothing to commit, working tree clean' From 550ac8a62d0efd97b301050ab4bc7a269685eb3f Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 20:24:38 -0400 Subject: [PATCH 08/19] Fix order of -- and - in cat command Somewhat unintuitively, the "-" is considered to be a file operand and not an option, even though it treated to signify standard input rather than a file named "-". On some systems including macOS, options to cat cannot follow paths. On those systems, writing "-" before "--" causes "--" to be treated as a filename, causing the script to break on macOS. This changes the order, which lets the cat command work on all systems, though (as before) if the subsequent argument were a literal "-" then that would not have the desired effect. In this case that is okay, since we actually know that the other argument, the value of the source_file variable, is not exactly "-". (We even know that it does not begin "-". But keeping the "--" before it makes clearer to human readers that this argument is a path and never an option.) --- etc/copy-packetline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 688f16aa4f0..2aea00a225e 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -101,7 +101,7 @@ function copy_with_header () { fi make_header "$source_file" "$endline" | - cat - -- "$source_file" >"$target_file" + cat -- - "$source_file" >"$target_file" } function generate_one () { From c0357d9d80886a364a11e085704d9cd2afaef7c3 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 20:53:51 -0400 Subject: [PATCH 09/19] Invoke script directly on CI instead of via just This speeds things up, as no cargo or just commands have to be run, and no Rust toolchain has to be installed. If part of all of the script is replaced by a tool implemented in Rust, then of course this should (and probably must) be revisited. --- .github/workflows/ci.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcb702ff2e8..ce7a579133a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -229,17 +229,13 @@ jobs: shell: bash steps: - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@v1 - with: - toolchain: stable - - uses: extractions/setup-just@v2 - name: Check that working tree is initially clean run: | git status set -x git status | grep -qF 'nothing to commit, working tree clean' - name: Regenerate gix-packetline-blocking/src - run: just copy-packetline + run: etc/copy-packetline.sh - name: Check that gix-packetline-blocking/src was already up to date run: | git status From 84992d7510410e30bcad27795a8a296ac2a5928a Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 21:09:36 -0400 Subject: [PATCH 10/19] Use --porcelain instead of --short for git status Since --porcelain is guaranteed stable across future git versions. --- etc/copy-packetline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 2aea00a225e..79910735a04 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -34,7 +34,7 @@ function merging () { } function target_dir_status () { - git status --short --ignored=traditional -- "$target_dir" || + git status --porcelain --ignored=traditional -- "$target_dir" || fail 'git-status failed' } From 3710c34643cb6a68b9b7c0391d8d2dd6cb539c00 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 21:18:03 -0400 Subject: [PATCH 11/19] Make CI clean working tree checks simpler and more robust --- .github/workflows/ci.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce7a579133a..5da9b23a3cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -230,14 +230,8 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check that working tree is initially clean - run: | - git status - set -x - git status | grep -qF 'nothing to commit, working tree clean' + run: git status; set -x; git diff --quiet - name: Regenerate gix-packetline-blocking/src run: etc/copy-packetline.sh - name: Check that gix-packetline-blocking/src was already up to date - run: | - git status - set -x - git status | grep -qF 'nothing to commit, working tree clean' + run: git status; set -x; git diff --quiet From ac35cfcf72ddfd3bf5354b3e8a6d1ce5509f65b1 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 21:48:30 -0400 Subject: [PATCH 12/19] Better explain why we cancel even on unstaged deletions Only the secondary reason, which is the less important one, was given before. Now both are given and the more important reason is emphasized. (If the entire subtree is deleted on disk, staged or not, then we do proceed, since neither disadvantage applies. I think this is clear from context, since this is checked first, and like the other checks, it is commented.) --- etc/copy-packetline.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 79910735a04..6f08d9a5e34 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -51,7 +51,10 @@ function check_target_dir () { fi else # We can lose data if anything of value at the target is not in the index. - # (Even unstaged deletions, for we can forget what was and wasn't deleted.) + # (This includes unstaged deletions, for two reasons. One is that we could + # lose track of which files had been deleted. More importantly, replacing a + # staged symlink or regular file with an unstaged directory is shown by + # git-status as only a deletion, even if the directory is non-empty.) if target_dir_status | grep -q '^.[^ ]'; then fail 'target exists, with unstaged changes or ignored files' fi From a793bde062553288bfc045bfda75addb25a7be76 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 21:52:06 -0400 Subject: [PATCH 13/19] Replace gix-packetline-blocking/src with generated files These are copies, except for their headers, and generated by the etc/copy-packetline.sh script. This should make the new CI checks, which use the script, pass. --- gix-packetline-blocking/src | 1 - gix-packetline-blocking/src/decode.rs | 148 +++++++ .../src/encode/async_io.rs | 215 ++++++++++ .../src/encode/blocking_io.rs | 78 ++++ gix-packetline-blocking/src/encode/mod.rs | 29 ++ gix-packetline-blocking/src/lib.rs | 110 +++++ gix-packetline-blocking/src/line/async_io.rs | 49 +++ .../src/line/blocking_io.rs | 46 +++ gix-packetline-blocking/src/line/mod.rs | 90 ++++ gix-packetline-blocking/src/read/async_io.rs | 201 +++++++++ .../src/read/blocking_io.rs | 192 +++++++++ gix-packetline-blocking/src/read/mod.rs | 130 ++++++ .../src/read/sidebands/async_io.rs | 383 ++++++++++++++++++ .../src/read/sidebands/blocking_io.rs | 218 ++++++++++ .../src/read/sidebands/mod.rs | 11 + gix-packetline-blocking/src/write/async_io.rs | 99 +++++ .../src/write/blocking_io.rs | 73 ++++ gix-packetline-blocking/src/write/mod.rs | 23 ++ 18 files changed, 2095 insertions(+), 1 deletion(-) delete mode 120000 gix-packetline-blocking/src create mode 100644 gix-packetline-blocking/src/decode.rs create mode 100644 gix-packetline-blocking/src/encode/async_io.rs create mode 100644 gix-packetline-blocking/src/encode/blocking_io.rs create mode 100644 gix-packetline-blocking/src/encode/mod.rs create mode 100644 gix-packetline-blocking/src/lib.rs create mode 100644 gix-packetline-blocking/src/line/async_io.rs create mode 100644 gix-packetline-blocking/src/line/blocking_io.rs create mode 100644 gix-packetline-blocking/src/line/mod.rs create mode 100644 gix-packetline-blocking/src/read/async_io.rs create mode 100644 gix-packetline-blocking/src/read/blocking_io.rs create mode 100644 gix-packetline-blocking/src/read/mod.rs create mode 100644 gix-packetline-blocking/src/read/sidebands/async_io.rs create mode 100644 gix-packetline-blocking/src/read/sidebands/blocking_io.rs create mode 100644 gix-packetline-blocking/src/read/sidebands/mod.rs create mode 100644 gix-packetline-blocking/src/write/async_io.rs create mode 100644 gix-packetline-blocking/src/write/blocking_io.rs create mode 100644 gix-packetline-blocking/src/write/mod.rs diff --git a/gix-packetline-blocking/src b/gix-packetline-blocking/src deleted file mode 120000 index a92fc2d976d..00000000000 --- a/gix-packetline-blocking/src +++ /dev/null @@ -1 +0,0 @@ -../gix-packetline/src \ No newline at end of file diff --git a/gix-packetline-blocking/src/decode.rs b/gix-packetline-blocking/src/decode.rs new file mode 100644 index 00000000000..c814a46c8cd --- /dev/null +++ b/gix-packetline-blocking/src/decode.rs @@ -0,0 +1,148 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/decode.rs. Run `just copy-packetline` to update it. + +use bstr::BString; + +use crate::{PacketLineRef, DELIMITER_LINE, FLUSH_LINE, MAX_DATA_LEN, MAX_LINE_LEN, RESPONSE_END_LINE, U16_HEX_BYTES}; + +/// The error used in the [`decode`][mod@crate::decode] module +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Failed to decode the first four hex bytes indicating the line length: {err}")] + HexDecode { err: String }, + #[error("The data received claims to be larger than the maximum allowed size: got {length_in_bytes}, exceeds {MAX_DATA_LEN}")] + DataLengthLimitExceeded { length_in_bytes: usize }, + #[error("Received an invalid empty line")] + DataIsEmpty, + #[error("Received an invalid line of length 3")] + InvalidLineLength, + #[error("{data:?} - consumed {bytes_consumed} bytes")] + Line { data: BString, bytes_consumed: usize }, + #[error("Needing {bytes_needed} additional bytes to decode the line successfully")] + NotEnoughData { bytes_needed: usize }, +} + +/// +#[allow(clippy::empty_docs)] +pub mod band { + /// The error used in [`PacketLineRef::decode_band()`][super::PacketLineRef::decode_band()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("attempt to decode a non-side channel line or input was malformed: {band_id}")] + InvalidSideBand { band_id: u8 }, + #[error("attempt to decode a non-data line into a side-channel band")] + NonDataLine, + } +} + +/// A utility return type to support incremental parsing of packet lines. +#[derive(Debug, Clone)] +pub enum Stream<'a> { + /// Indicate a single packet line was parsed completely + Complete { + /// The parsed packet line + line: PacketLineRef<'a>, + /// The amount of bytes consumed from input + bytes_consumed: usize, + }, + /// A packet line could not yet be parsed due to missing bytes + Incomplete { + /// The amount of additional bytes needed for the parsing to complete + bytes_needed: usize, + }, +} + +/// The result of [`hex_prefix()`] indicating either a special packet line or the amount of wanted bytes +pub enum PacketLineOrWantedSize<'a> { + /// The special kind of packet line decoded from the hex prefix. It never contains actual data. + Line(PacketLineRef<'a>), + /// The amount of bytes indicated by the hex prefix of the packet line. + Wanted(u16), +} + +/// Decode the `four_bytes` packet line prefix provided in hexadecimal form and check it for validity. +pub fn hex_prefix(four_bytes: &[u8]) -> Result, Error> { + debug_assert_eq!(four_bytes.len(), 4, "need four hex bytes"); + for (line_bytes, line_type) in &[ + (FLUSH_LINE, PacketLineRef::Flush), + (DELIMITER_LINE, PacketLineRef::Delimiter), + (RESPONSE_END_LINE, PacketLineRef::ResponseEnd), + ] { + if four_bytes == *line_bytes { + return Ok(PacketLineOrWantedSize::Line(*line_type)); + } + } + + let mut buf = [0u8; U16_HEX_BYTES / 2]; + faster_hex::hex_decode(four_bytes, &mut buf).map_err(|err| Error::HexDecode { err: err.to_string() })?; + let wanted_bytes = u16::from_be_bytes(buf); + + if wanted_bytes == 3 { + return Err(Error::InvalidLineLength); + } + if wanted_bytes == 4 { + return Err(Error::DataIsEmpty); + } + debug_assert!( + wanted_bytes as usize > U16_HEX_BYTES, + "by now there should be more wanted bytes than prefix bytes" + ); + Ok(PacketLineOrWantedSize::Wanted(wanted_bytes - U16_HEX_BYTES as u16)) +} + +/// Obtain a `PacketLine` from `data` after assuring `data` is small enough to fit. +pub fn to_data_line(data: &[u8]) -> Result, Error> { + if data.len() > MAX_LINE_LEN { + return Err(Error::DataLengthLimitExceeded { + length_in_bytes: data.len(), + }); + } + + Ok(PacketLineRef::Data(data)) +} + +/// Decode `data` as packet line while reporting whether the data is complete or not using a [`Stream`]. +pub fn streaming(data: &[u8]) -> Result, Error> { + let data_len = data.len(); + if data_len < U16_HEX_BYTES { + return Ok(Stream::Incomplete { + bytes_needed: U16_HEX_BYTES - data_len, + }); + } + let wanted_bytes = match hex_prefix(&data[..U16_HEX_BYTES])? { + PacketLineOrWantedSize::Wanted(s) => s as usize, + PacketLineOrWantedSize::Line(line) => { + return Ok(Stream::Complete { + line, + bytes_consumed: 4, + }) + } + } + U16_HEX_BYTES; + if wanted_bytes > MAX_LINE_LEN { + return Err(Error::DataLengthLimitExceeded { + length_in_bytes: wanted_bytes, + }); + } + if data_len < wanted_bytes { + return Ok(Stream::Incomplete { + bytes_needed: wanted_bytes - data_len, + }); + } + + Ok(Stream::Complete { + line: to_data_line(&data[U16_HEX_BYTES..wanted_bytes])?, + bytes_consumed: wanted_bytes, + }) +} + +/// Decode an entire packet line from data or fail. +/// +/// Note that failure also happens if there is not enough data to parse a complete packet line, as opposed to [`streaming()`] decoding +/// succeeds in that case, stating how much more bytes are required. +pub fn all_at_once(data: &[u8]) -> Result, Error> { + match streaming(data)? { + Stream::Complete { line, .. } => Ok(line), + Stream::Incomplete { bytes_needed } => Err(Error::NotEnoughData { bytes_needed }), + } +} diff --git a/gix-packetline-blocking/src/encode/async_io.rs b/gix-packetline-blocking/src/encode/async_io.rs new file mode 100644 index 00000000000..45226351553 --- /dev/null +++ b/gix-packetline-blocking/src/encode/async_io.rs @@ -0,0 +1,215 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/encode/async_io.rs. Run `just copy-packetline` to update it. + +use std::{ + io, + pin::Pin, + task::{Context, Poll}, +}; + +use futures_io::AsyncWrite; +use futures_lite::AsyncWriteExt; + +use super::u16_to_hex; +use crate::{encode::Error, Channel, DELIMITER_LINE, ERR_PREFIX, FLUSH_LINE, MAX_DATA_LEN, RESPONSE_END_LINE}; + +pin_project_lite::pin_project! { + /// A way of writing packet lines asynchronously. + pub struct LineWriter<'a, W> { + #[pin] + pub(crate) writer: W, + pub(crate) prefix: &'a [u8], + pub(crate) suffix: &'a [u8], + state: State<'a>, + } +} + +enum State<'a> { + Idle, + WriteHexLen([u8; 4], usize), + WritePrefix(&'a [u8]), + WriteData(usize), + WriteSuffix(&'a [u8]), +} + +impl<'a, W: AsyncWrite + Unpin> LineWriter<'a, W> { + /// Create a new line writer writing data with a `prefix` and `suffix`. + /// + /// Keep the additional `prefix` or `suffix` buffers empty if no prefix or suffix should be written. + pub fn new(writer: W, prefix: &'a [u8], suffix: &'a [u8]) -> Self { + LineWriter { + writer, + prefix, + suffix, + state: State::Idle, + } + } + + /// Consume self and reveal the inner writer. + pub fn into_inner(self) -> W { + self.writer + } +} + +fn into_io_err(err: Error) -> io::Error { + io::Error::new(io::ErrorKind::Other, err) +} + +impl AsyncWrite for LineWriter<'_, W> { + fn poll_write(self: Pin<&mut Self>, cx: &mut Context<'_>, data: &[u8]) -> Poll> { + use futures_lite::ready; + let mut this = self.project(); + loop { + match &mut this.state { + State::Idle => { + let data_len = this.prefix.len() + data.len() + this.suffix.len(); + if data_len > MAX_DATA_LEN { + return Poll::Ready(Err(into_io_err(Error::DataLengthLimitExceeded { + length_in_bytes: data_len, + }))); + } + if data.is_empty() { + return Poll::Ready(Err(into_io_err(Error::DataIsEmpty))); + } + let data_len = data_len + 4; + let len_buf = u16_to_hex(data_len as u16); + *this.state = State::WriteHexLen(len_buf, 0) + } + State::WriteHexLen(hex_len, written) => { + while *written != hex_len.len() { + let n = ready!(this.writer.as_mut().poll_write(cx, &hex_len[*written..]))?; + if n == 0 { + return Poll::Ready(Err(io::ErrorKind::WriteZero.into())); + } + *written += n; + } + if this.prefix.is_empty() { + *this.state = State::WriteData(0) + } else { + *this.state = State::WritePrefix(this.prefix) + } + } + State::WritePrefix(buf) => { + while !buf.is_empty() { + let n = ready!(this.writer.as_mut().poll_write(cx, buf))?; + if n == 0 { + return Poll::Ready(Err(io::ErrorKind::WriteZero.into())); + } + let (_, rest) = std::mem::take(buf).split_at(n); + *buf = rest; + } + *this.state = State::WriteData(0) + } + State::WriteData(written) => { + while *written != data.len() { + let n = ready!(this.writer.as_mut().poll_write(cx, &data[*written..]))?; + if n == 0 { + return Poll::Ready(Err(io::ErrorKind::WriteZero.into())); + } + *written += n; + } + if this.suffix.is_empty() { + let written = 4 + this.prefix.len() + *written; + *this.state = State::Idle; + return Poll::Ready(Ok(written)); + } else { + *this.state = State::WriteSuffix(this.suffix) + } + } + State::WriteSuffix(buf) => { + while !buf.is_empty() { + let n = ready!(this.writer.as_mut().poll_write(cx, buf))?; + if n == 0 { + return Poll::Ready(Err(io::ErrorKind::WriteZero.into())); + } + let (_, rest) = std::mem::take(buf).split_at(n); + *buf = rest; + } + *this.state = State::Idle; + return Poll::Ready(Ok(4 + this.prefix.len() + data.len() + this.suffix.len())); + } + } + } + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + this.writer.poll_flush(cx) + } + + fn poll_close(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + this.writer.poll_close(cx) + } +} + +async fn prefixed_and_suffixed_data_to_write( + prefix: &[u8], + data: &[u8], + suffix: &[u8], + mut out: impl AsyncWrite + Unpin, +) -> io::Result { + let data_len = prefix.len() + data.len() + suffix.len(); + if data_len > MAX_DATA_LEN { + return Err(into_io_err(Error::DataLengthLimitExceeded { + length_in_bytes: data_len, + })); + } + if data.is_empty() { + return Err(into_io_err(Error::DataIsEmpty)); + } + + let data_len = data_len + 4; + let buf = u16_to_hex(data_len as u16); + + out.write_all(&buf).await?; + if !prefix.is_empty() { + out.write_all(prefix).await?; + } + out.write_all(data).await?; + if !suffix.is_empty() { + out.write_all(suffix).await?; + } + Ok(data_len) +} + +async fn prefixed_data_to_write(prefix: &[u8], data: &[u8], out: impl AsyncWrite + Unpin) -> io::Result { + prefixed_and_suffixed_data_to_write(prefix, data, &[], out).await +} + +/// Write a `text` message to `out`, which is assured to end in a newline. +pub async fn text_to_write(text: &[u8], out: impl AsyncWrite + Unpin) -> io::Result { + prefixed_and_suffixed_data_to_write(&[], text, &[b'\n'], out).await +} + +/// Write a `data` message to `out`. +pub async fn data_to_write(data: &[u8], out: impl AsyncWrite + Unpin) -> io::Result { + prefixed_data_to_write(&[], data, out).await +} + +/// Write an error `message` to `out`. +pub async fn error_to_write(message: &[u8], out: impl AsyncWrite + Unpin) -> io::Result { + prefixed_data_to_write(ERR_PREFIX, message, out).await +} + +/// Write a response-end message to `out`. +pub async fn response_end_to_write(mut out: impl AsyncWrite + Unpin) -> io::Result { + out.write_all(RESPONSE_END_LINE).await?; + Ok(4) +} + +/// Write a delim message to `out`. +pub async fn delim_to_write(mut out: impl AsyncWrite + Unpin) -> io::Result { + out.write_all(DELIMITER_LINE).await?; + Ok(4) +} + +/// Write a flush message to `out`. +pub async fn flush_to_write(mut out: impl AsyncWrite + Unpin) -> io::Result { + out.write_all(FLUSH_LINE).await?; + Ok(4) +} + +/// Write `data` of `kind` to `out` using side-band encoding. +pub async fn band_to_write(kind: Channel, data: &[u8], out: impl AsyncWrite + Unpin) -> io::Result { + prefixed_data_to_write(&[kind as u8], data, out).await +} diff --git a/gix-packetline-blocking/src/encode/blocking_io.rs b/gix-packetline-blocking/src/encode/blocking_io.rs new file mode 100644 index 00000000000..244a14650cc --- /dev/null +++ b/gix-packetline-blocking/src/encode/blocking_io.rs @@ -0,0 +1,78 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/encode/blocking_io.rs. Run `just copy-packetline` to update it. + +use std::io; + +use super::u16_to_hex; +use crate::{encode::Error, Channel, DELIMITER_LINE, ERR_PREFIX, FLUSH_LINE, MAX_DATA_LEN, RESPONSE_END_LINE}; + +/// Write a response-end message to `out`. +pub fn response_end_to_write(mut out: impl io::Write) -> io::Result { + out.write_all(RESPONSE_END_LINE).map(|_| 4) +} + +/// Write a delim message to `out`. +pub fn delim_to_write(mut out: impl io::Write) -> io::Result { + out.write_all(DELIMITER_LINE).map(|_| 4) +} + +/// Write a flush message to `out`. +pub fn flush_to_write(mut out: impl io::Write) -> io::Result { + out.write_all(FLUSH_LINE).map(|_| 4) +} + +/// Write an error `message` to `out`. +pub fn error_to_write(message: &[u8], out: impl io::Write) -> io::Result { + prefixed_data_to_write(ERR_PREFIX, message, out) +} + +/// Write `data` of `kind` to `out` using side-band encoding. +pub fn band_to_write(kind: Channel, data: &[u8], out: impl io::Write) -> io::Result { + prefixed_data_to_write(&[kind as u8], data, out) +} + +/// Write a `data` message to `out`. +pub fn data_to_write(data: &[u8], out: impl io::Write) -> io::Result { + prefixed_data_to_write(&[], data, out) +} + +/// Write a `text` message to `out`, which is assured to end in a newline. +pub fn text_to_write(text: &[u8], out: impl io::Write) -> io::Result { + prefixed_and_suffixed_data_to_write(&[], text, &[b'\n'], out) +} + +fn prefixed_data_to_write(prefix: &[u8], data: &[u8], out: impl io::Write) -> io::Result { + prefixed_and_suffixed_data_to_write(prefix, data, &[], out) +} + +fn prefixed_and_suffixed_data_to_write( + prefix: &[u8], + data: &[u8], + suffix: &[u8], + mut out: impl io::Write, +) -> io::Result { + let data_len = prefix.len() + data.len() + suffix.len(); + if data_len > MAX_DATA_LEN { + return Err(io::Error::new( + io::ErrorKind::Other, + Error::DataLengthLimitExceeded { + length_in_bytes: data_len, + }, + )); + } + if data.is_empty() { + return Err(io::Error::new(io::ErrorKind::Other, Error::DataIsEmpty)); + } + + let data_len = data_len + 4; + let buf = u16_to_hex(data_len as u16); + + out.write_all(&buf)?; + if !prefix.is_empty() { + out.write_all(prefix)?; + } + out.write_all(data)?; + if !suffix.is_empty() { + out.write_all(suffix)?; + } + Ok(data_len) +} diff --git a/gix-packetline-blocking/src/encode/mod.rs b/gix-packetline-blocking/src/encode/mod.rs new file mode 100644 index 00000000000..56da585a018 --- /dev/null +++ b/gix-packetline-blocking/src/encode/mod.rs @@ -0,0 +1,29 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/encode/mod.rs. Run `just copy-packetline` to update it. + +use crate::MAX_DATA_LEN; + +/// The error returned by most functions in the [`encode`][crate::encode] module +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Cannot encode more than {MAX_DATA_LEN} bytes, got {length_in_bytes}")] + DataLengthLimitExceeded { length_in_bytes: usize }, + #[error("Empty lines are invalid")] + DataIsEmpty, +} + +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +mod async_io; +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +pub use async_io::*; + +#[cfg(feature = "blocking-io")] +mod blocking_io; +#[cfg(feature = "blocking-io")] +pub use blocking_io::*; + +pub(crate) fn u16_to_hex(value: u16) -> [u8; 4] { + let mut buf = [0u8; 4]; + faster_hex::hex_encode(&value.to_be_bytes(), &mut buf).expect("two bytes to 4 hex chars never fails"); + buf +} diff --git a/gix-packetline-blocking/src/lib.rs b/gix-packetline-blocking/src/lib.rs new file mode 100644 index 00000000000..51736330ddd --- /dev/null +++ b/gix-packetline-blocking/src/lib.rs @@ -0,0 +1,110 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/lib.rs. Run `just copy-packetline` to update it. + +//! Read and write the git packet line wire format without copying it. +//! +//! For reading the packet line format use the [`StreamingPeekableIter`], and for writing the [`Writer`]. +//! ## Feature Flags +#![cfg_attr( + all(doc, all(doc, feature = "document-features")), + doc = ::document_features::document_features!() +)] +#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] + +const U16_HEX_BYTES: usize = 4; +const MAX_DATA_LEN: usize = 65516; +const MAX_LINE_LEN: usize = MAX_DATA_LEN + U16_HEX_BYTES; +const FLUSH_LINE: &[u8] = b"0000"; +const DELIMITER_LINE: &[u8] = b"0001"; +const RESPONSE_END_LINE: &[u8] = b"0002"; +const ERR_PREFIX: &[u8] = b"ERR "; + +/// One of three side-band types allowing to multiplex information over a single connection. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Channel { + /// The usable data itself in any format. + Data = 1, + /// Progress information in a user-readable format. + Progress = 2, + /// Error information in a user readable format. Receiving it usually terminates the connection. + Error = 3, +} + +mod line; +/// +#[allow(clippy::empty_docs)] +pub mod read; + +/// +#[allow(clippy::empty_docs)] +#[cfg(any(feature = "async-io", feature = "blocking-io"))] +mod write; +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +pub use write::async_io::Writer; +#[cfg(feature = "blocking-io")] +pub use write::blocking_io::Writer; + +/// A borrowed packet line as it refers to a slice of data by reference. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum PacketLineRef<'a> { + /// A chunk of raw data. + Data(&'a [u8]), + /// A flush packet. + Flush, + /// A delimiter packet. + Delimiter, + /// The end of the response. + ResponseEnd, +} + +/// A packet line representing an Error in a side-band channel. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ErrorRef<'a>(pub &'a [u8]); + +/// A packet line representing text, which may include a trailing newline. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct TextRef<'a>(pub &'a [u8]); + +/// A band in a side-band channel. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum BandRef<'a> { + /// A band carrying data. + Data(&'a [u8]), + /// A band carrying user readable progress information. + Progress(&'a [u8]), + /// A band carrying user readable errors. + Error(&'a [u8]), +} + +/// Read pack lines one after another, without consuming more than needed from the underlying +/// [`Read`][std::io::Read]. [`Flush`][PacketLineRef::Flush] lines cause the reader to stop producing lines forever, +/// leaving [`Read`][std::io::Read] at the start of whatever comes next. +/// +/// This implementation tries hard not to allocate at all which leads to quite some added complexity and plenty of extra memory copies. +pub struct StreamingPeekableIter { + read: T, + peek_buf: Vec, + #[cfg(any(feature = "blocking-io", feature = "async-io"))] + buf: Vec, + fail_on_err_lines: bool, + delimiters: &'static [PacketLineRef<'static>], + is_done: bool, + stopped_at: Option>, + #[cfg_attr(all(not(feature = "async-io"), not(feature = "blocking-io")), allow(dead_code))] + trace: bool, +} + +/// Utilities to help decoding packet lines +pub mod decode; +#[doc(inline)] +pub use decode::all_at_once as decode; +/// Utilities to encode different kinds of packet lines +pub mod encode; + +#[cfg(all(feature = "async-io", feature = "blocking-io"))] +compile_error!("Cannot set both 'blocking-io' and 'async-io' features as they are mutually exclusive"); diff --git a/gix-packetline-blocking/src/line/async_io.rs b/gix-packetline-blocking/src/line/async_io.rs new file mode 100644 index 00000000000..65a5756d4d7 --- /dev/null +++ b/gix-packetline-blocking/src/line/async_io.rs @@ -0,0 +1,49 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/line/async_io.rs. Run `just copy-packetline` to update it. + +use std::io; + +use futures_io::AsyncWrite; + +use crate::{encode, BandRef, Channel, ErrorRef, PacketLineRef, TextRef}; + +impl<'a> BandRef<'a> { + /// Serialize this instance to `out`, returning the amount of bytes written. + /// + /// The data written to `out` can be decoded with [`Borrowed::decode_band()]`. + pub async fn write_to(&self, out: impl AsyncWrite + Unpin) -> io::Result { + match self { + BandRef::Data(d) => encode::band_to_write(Channel::Data, d, out), + BandRef::Progress(d) => encode::band_to_write(Channel::Progress, d, out), + BandRef::Error(d) => encode::band_to_write(Channel::Error, d, out), + } + .await + } +} + +impl<'a> TextRef<'a> { + /// Serialize this instance to `out`, appending a newline if there is none, returning the amount of bytes written. + pub async fn write_to(&self, out: impl AsyncWrite + Unpin) -> io::Result { + encode::text_to_write(self.0, out).await + } +} + +impl<'a> ErrorRef<'a> { + /// Serialize this line as error to `out`. + /// + /// This includes a marker to allow decoding it outside of a side-band channel, returning the amount of bytes written. + pub async fn write_to(&self, out: impl AsyncWrite + Unpin) -> io::Result { + encode::error_to_write(self.0, out).await + } +} + +impl<'a> PacketLineRef<'a> { + /// Serialize this instance to `out` in git `packetline` format, returning the amount of bytes written to `out`. + pub async fn write_to(&self, out: impl AsyncWrite + Unpin) -> io::Result { + match self { + PacketLineRef::Data(d) => encode::data_to_write(d, out).await, + PacketLineRef::Flush => encode::flush_to_write(out).await, + PacketLineRef::Delimiter => encode::delim_to_write(out).await, + PacketLineRef::ResponseEnd => encode::response_end_to_write(out).await, + } + } +} diff --git a/gix-packetline-blocking/src/line/blocking_io.rs b/gix-packetline-blocking/src/line/blocking_io.rs new file mode 100644 index 00000000000..5d698c1536c --- /dev/null +++ b/gix-packetline-blocking/src/line/blocking_io.rs @@ -0,0 +1,46 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/line/blocking_io.rs. Run `just copy-packetline` to update it. + +use std::io; + +use crate::{encode, BandRef, Channel, ErrorRef, PacketLineRef, TextRef}; + +impl<'a> BandRef<'a> { + /// Serialize this instance to `out`, returning the amount of bytes written. + /// + /// The data written to `out` can be decoded with [`Borrowed::decode_band()]`. + pub fn write_to(&self, out: impl io::Write) -> io::Result { + match self { + BandRef::Data(d) => encode::band_to_write(Channel::Data, d, out), + BandRef::Progress(d) => encode::band_to_write(Channel::Progress, d, out), + BandRef::Error(d) => encode::band_to_write(Channel::Error, d, out), + } + } +} + +impl<'a> TextRef<'a> { + /// Serialize this instance to `out`, appending a newline if there is none, returning the amount of bytes written. + pub fn write_to(&self, out: impl io::Write) -> io::Result { + encode::text_to_write(self.0, out) + } +} + +impl<'a> ErrorRef<'a> { + /// Serialize this line as error to `out`. + /// + /// This includes a marker to allow decoding it outside of a side-band channel, returning the amount of bytes written. + pub fn write_to(&self, out: impl io::Write) -> io::Result { + encode::error_to_write(self.0, out) + } +} + +impl<'a> PacketLineRef<'a> { + /// Serialize this instance to `out` in git `packetline` format, returning the amount of bytes written to `out`. + pub fn write_to(&self, out: impl io::Write) -> io::Result { + match self { + PacketLineRef::Data(d) => encode::data_to_write(d, out), + PacketLineRef::Flush => encode::flush_to_write(out), + PacketLineRef::Delimiter => encode::delim_to_write(out), + PacketLineRef::ResponseEnd => encode::response_end_to_write(out), + } + } +} diff --git a/gix-packetline-blocking/src/line/mod.rs b/gix-packetline-blocking/src/line/mod.rs new file mode 100644 index 00000000000..9e27f36e4de --- /dev/null +++ b/gix-packetline-blocking/src/line/mod.rs @@ -0,0 +1,90 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/line/mod.rs. Run `just copy-packetline` to update it. + +use bstr::BStr; + +use crate::{decode, BandRef, Channel, ErrorRef, PacketLineRef, TextRef, ERR_PREFIX}; + +impl<'a> PacketLineRef<'a> { + /// Return this instance as slice if it's [`Data`][PacketLineRef::Data]. + pub fn as_slice(&self) -> Option<&'a [u8]> { + match self { + PacketLineRef::Data(d) => Some(d), + PacketLineRef::Flush | PacketLineRef::Delimiter | PacketLineRef::ResponseEnd => None, + } + } + /// Return this instance's [`as_slice()`][PacketLineRef::as_slice()] as [`BStr`]. + pub fn as_bstr(&self) -> Option<&'a BStr> { + self.as_slice().map(Into::into) + } + /// Interpret this instance's [`as_slice()`][PacketLineRef::as_slice()] as [`ErrorRef`]. + /// + /// This works for any data received in an error [channel][crate::Channel]. + /// + /// Note that this creates an unchecked error using the slice verbatim, which is useful to [serialize it][ErrorRef::write_to()]. + /// See [`check_error()`][PacketLineRef::check_error()] for a version that assures the error information is in the expected format. + pub fn as_error(&self) -> Option> { + self.as_slice().map(ErrorRef) + } + /// Check this instance's [`as_slice()`][PacketLineRef::as_slice()] is a valid [`ErrorRef`] and return it. + /// + /// This works for any data received in an error [channel][crate::Channel]. + pub fn check_error(&self) -> Option> { + self.as_slice().and_then(|data| { + if data.len() >= ERR_PREFIX.len() && &data[..ERR_PREFIX.len()] == ERR_PREFIX { + Some(ErrorRef(&data[ERR_PREFIX.len()..])) + } else { + None + } + }) + } + /// Return this instance as text, with the trailing newline truncated if present. + pub fn as_text(&self) -> Option> { + self.as_slice().map(Into::into) + } + + /// Interpret the data in this [`slice`][PacketLineRef::as_slice()] as [`BandRef`] according to the given `kind` of channel. + /// + /// Note that this is only relevant in a side-band channel. + /// See [`decode_band()`][PacketLineRef::decode_band()] in case `kind` is unknown. + pub fn as_band(&self, kind: Channel) -> Option> { + self.as_slice().map(|d| match kind { + Channel::Data => BandRef::Data(d), + Channel::Progress => BandRef::Progress(d), + Channel::Error => BandRef::Error(d), + }) + } + + /// Decode the band of this [`slice`][PacketLineRef::as_slice()] + pub fn decode_band(&self) -> Result, decode::band::Error> { + let d = self.as_slice().ok_or(decode::band::Error::NonDataLine)?; + Ok(match d[0] { + 1 => BandRef::Data(&d[1..]), + 2 => BandRef::Progress(&d[1..]), + 3 => BandRef::Error(&d[1..]), + band => return Err(decode::band::Error::InvalidSideBand { band_id: band }), + }) + } +} + +impl<'a> From<&'a [u8]> for TextRef<'a> { + fn from(d: &'a [u8]) -> Self { + let d = if d[d.len() - 1] == b'\n' { &d[..d.len() - 1] } else { d }; + TextRef(d) + } +} + +impl<'a> TextRef<'a> { + /// Return this instance's data. + pub fn as_slice(&self) -> &'a [u8] { + self.0 + } + /// Return this instance's data as [`BStr`]. + pub fn as_bstr(&self) -> &'a BStr { + self.0.into() + } +} + +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +mod async_io; +#[cfg(feature = "blocking-io")] +mod blocking_io; diff --git a/gix-packetline-blocking/src/read/async_io.rs b/gix-packetline-blocking/src/read/async_io.rs new file mode 100644 index 00000000000..d35b2a5b08f --- /dev/null +++ b/gix-packetline-blocking/src/read/async_io.rs @@ -0,0 +1,201 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/read/async_io.rs. Run `just copy-packetline` to update it. + +use std::io; + +use bstr::ByteSlice; +use futures_io::AsyncRead; +use futures_lite::AsyncReadExt; + +use crate::{ + decode, + read::{ExhaustiveOutcome, ProgressAction, WithSidebands}, + PacketLineRef, StreamingPeekableIter, MAX_LINE_LEN, U16_HEX_BYTES, +}; + +/// Non-IO methods +impl StreamingPeekableIter +where + T: AsyncRead + Unpin, +{ + #[allow(clippy::needless_lifetimes)] // TODO: remove once this is clippy false positive is fixed + async fn read_line_inner<'a>( + reader: &mut T, + buf: &'a mut [u8], + ) -> io::Result, decode::Error>> { + let (hex_bytes, data_bytes) = buf.split_at_mut(4); + reader.read_exact(hex_bytes).await?; + let num_data_bytes = match decode::hex_prefix(hex_bytes) { + Ok(decode::PacketLineOrWantedSize::Line(line)) => return Ok(Ok(line)), + Ok(decode::PacketLineOrWantedSize::Wanted(additional_bytes)) => additional_bytes as usize, + Err(err) => return Ok(Err(err)), + }; + + let (data_bytes, _) = data_bytes.split_at_mut(num_data_bytes); + reader.read_exact(data_bytes).await?; + match decode::to_data_line(data_bytes) { + Ok(line) => Ok(Ok(line)), + Err(err) => Ok(Err(err)), + } + } + + /// This function is needed to help the borrow checker allow us to return references all the time + /// It contains a bunch of logic shared between peek and `read_line` invocations. + async fn read_line_inner_exhaustive<'a>( + reader: &mut T, + buf: &'a mut Vec, + delimiters: &[PacketLineRef<'static>], + fail_on_err_lines: bool, + buf_resize: bool, + trace: bool, + ) -> ExhaustiveOutcome<'a> { + ( + false, + None, + Some(match Self::read_line_inner(reader, buf).await { + Ok(Ok(line)) => { + if trace { + match line { + #[allow(unused_variables)] + PacketLineRef::Data(d) => { + gix_trace::trace!("<< {}", d.as_bstr().trim().as_bstr()); + } + PacketLineRef::Flush => { + gix_trace::trace!("<< FLUSH"); + } + PacketLineRef::Delimiter => { + gix_trace::trace!("<< DELIM"); + } + PacketLineRef::ResponseEnd => { + gix_trace::trace!("<< RESPONSE_END"); + } + } + } + if delimiters.contains(&line) { + let stopped_at = delimiters.iter().find(|l| **l == line).copied(); + buf.clear(); + return (true, stopped_at, None); + } else if fail_on_err_lines { + if let Some(err) = line.check_error() { + let err = err.0.as_bstr().to_owned(); + buf.clear(); + return ( + true, + None, + Some(Err(io::Error::new( + io::ErrorKind::Other, + crate::read::Error { message: err }, + ))), + ); + } + } + let len = line.as_slice().map_or(U16_HEX_BYTES, |s| s.len() + U16_HEX_BYTES); + if buf_resize { + buf.resize(len, 0); + } + Ok(Ok(crate::decode(buf).expect("only valid data here"))) + } + Ok(Err(err)) => { + buf.clear(); + Ok(Err(err)) + } + Err(err) => { + buf.clear(); + Err(err) + } + }), + ) + } + + /// Read a packet line into the internal buffer and return it. + /// + /// Returns `None` if the end of iteration is reached because of one of the following: + /// + /// * natural EOF + /// * ERR packet line encountered if [`fail_on_err_lines()`][StreamingPeekableIter::fail_on_err_lines()] is true. + /// * A `delimiter` packet line encountered + pub async fn read_line(&mut self) -> Option, decode::Error>>> { + if self.is_done { + return None; + } + if !self.peek_buf.is_empty() { + std::mem::swap(&mut self.peek_buf, &mut self.buf); + self.peek_buf.clear(); + Some(Ok(Ok(crate::decode(&self.buf).expect("only valid data in peek buf")))) + } else { + if self.buf.len() != MAX_LINE_LEN { + self.buf.resize(MAX_LINE_LEN, 0); + } + let (is_done, stopped_at, res) = Self::read_line_inner_exhaustive( + &mut self.read, + &mut self.buf, + self.delimiters, + self.fail_on_err_lines, + false, + self.trace, + ) + .await; + self.is_done = is_done; + self.stopped_at = stopped_at; + res + } + } + + /// Peek the next packet line without consuming it. Returns `None` if a stop-packet or an error + /// was encountered. + /// + /// Multiple calls to peek will return the same packet line, if there is one. + pub async fn peek_line(&mut self) -> Option, decode::Error>>> { + if self.is_done { + return None; + } + if self.peek_buf.is_empty() { + self.peek_buf.resize(MAX_LINE_LEN, 0); + let (is_done, stopped_at, res) = Self::read_line_inner_exhaustive( + &mut self.read, + &mut self.peek_buf, + self.delimiters, + self.fail_on_err_lines, + true, + self.trace, + ) + .await; + self.is_done = is_done; + self.stopped_at = stopped_at; + res + } else { + Some(Ok(Ok(crate::decode(&self.peek_buf).expect("only valid data here")))) + } + } + + /// Same as [`as_read_with_sidebands(…)`][StreamingPeekableIter::as_read_with_sidebands()], but for channels without side band support. + /// + /// Due to the preconfigured function type this method can be called without 'turbofish'. + #[allow(clippy::type_complexity)] + pub fn as_read(&mut self) -> WithSidebands<'_, T, fn(bool, &[u8]) -> ProgressAction> { + WithSidebands::new(self) + } + + /// Return this instance as implementor of [`Read`][io::Read] assuming side bands to be used in all received packet lines. + /// Each invocation of [`read_line()`][io::BufRead::read_line()] returns a packet line. + /// + /// Progress or error information will be passed to the given `handle_progress(is_error, text)` function, with `is_error: bool` + /// being true in case the `text` is to be interpreted as error. + /// + /// _Please note_ that side bands need to be negotiated with the server. + pub fn as_read_with_sidebands ProgressAction + Unpin>( + &mut self, + handle_progress: F, + ) -> WithSidebands<'_, T, F> { + WithSidebands::with_progress_handler(self, handle_progress) + } + + /// Same as [`as_read_with_sidebands(…)`][StreamingPeekableIter::as_read_with_sidebands()], but for channels without side band support. + /// + /// The type parameter `F` needs to be configured for this method to be callable using the 'turbofish' operator. + /// Use [`as_read()`][StreamingPeekableIter::as_read()]. + pub fn as_read_without_sidebands ProgressAction + Unpin>( + &mut self, + ) -> WithSidebands<'_, T, F> { + WithSidebands::without_progress_handler(self) + } +} diff --git a/gix-packetline-blocking/src/read/blocking_io.rs b/gix-packetline-blocking/src/read/blocking_io.rs new file mode 100644 index 00000000000..7857d2feb88 --- /dev/null +++ b/gix-packetline-blocking/src/read/blocking_io.rs @@ -0,0 +1,192 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/read/blocking_io.rs. Run `just copy-packetline` to update it. + +use std::io; + +use bstr::ByteSlice; + +use crate::{ + decode, + read::{ExhaustiveOutcome, ProgressAction, WithSidebands}, + PacketLineRef, StreamingPeekableIter, MAX_LINE_LEN, U16_HEX_BYTES, +}; + +/// Non-IO methods +impl StreamingPeekableIter +where + T: io::Read, +{ + fn read_line_inner<'a>(reader: &mut T, buf: &'a mut [u8]) -> io::Result, decode::Error>> { + let (hex_bytes, data_bytes) = buf.split_at_mut(4); + reader.read_exact(hex_bytes)?; + let num_data_bytes = match decode::hex_prefix(hex_bytes) { + Ok(decode::PacketLineOrWantedSize::Line(line)) => return Ok(Ok(line)), + Ok(decode::PacketLineOrWantedSize::Wanted(additional_bytes)) => additional_bytes as usize, + Err(err) => return Ok(Err(err)), + }; + + let (data_bytes, _) = data_bytes.split_at_mut(num_data_bytes); + reader.read_exact(data_bytes)?; + match decode::to_data_line(data_bytes) { + Ok(line) => Ok(Ok(line)), + Err(err) => Ok(Err(err)), + } + } + + /// This function is needed to help the borrow checker allow us to return references all the time + /// It contains a bunch of logic shared between peek and `read_line` invocations. + fn read_line_inner_exhaustive<'a>( + reader: &mut T, + buf: &'a mut Vec, + delimiters: &[PacketLineRef<'static>], + fail_on_err_lines: bool, + buf_resize: bool, + trace: bool, + ) -> ExhaustiveOutcome<'a> { + ( + false, + None, + Some(match Self::read_line_inner(reader, buf) { + Ok(Ok(line)) => { + if trace { + match line { + #[allow(unused_variables)] + PacketLineRef::Data(d) => { + gix_trace::trace!("<< {}", d.as_bstr().trim().as_bstr()); + } + PacketLineRef::Flush => { + gix_trace::trace!("<< FLUSH"); + } + PacketLineRef::Delimiter => { + gix_trace::trace!("<< DELIM"); + } + PacketLineRef::ResponseEnd => { + gix_trace::trace!("<< RESPONSE_END"); + } + } + } + if delimiters.contains(&line) { + let stopped_at = delimiters.iter().find(|l| **l == line).copied(); + buf.clear(); + return (true, stopped_at, None); + } else if fail_on_err_lines { + if let Some(err) = line.check_error() { + let err = err.0.as_bstr().to_owned(); + buf.clear(); + return ( + true, + None, + Some(Err(io::Error::new( + io::ErrorKind::Other, + crate::read::Error { message: err }, + ))), + ); + } + } + let len = line.as_slice().map_or(U16_HEX_BYTES, |s| s.len() + U16_HEX_BYTES); + if buf_resize { + buf.resize(len, 0); + } + // TODO(borrowchk): remove additional decoding of internal buffer which is needed only to make it past borrowchk + Ok(Ok(crate::decode(buf).expect("only valid data here"))) + } + Ok(Err(err)) => { + buf.clear(); + Ok(Err(err)) + } + Err(err) => { + buf.clear(); + Err(err) + } + }), + ) + } + + /// Read a packet line into the internal buffer and return it. + /// + /// Returns `None` if the end of iteration is reached because of one of the following: + /// + /// * natural EOF + /// * ERR packet line encountered if [`fail_on_err_lines()`][StreamingPeekableIter::fail_on_err_lines()] is true. + /// * A `delimiter` packet line encountered + pub fn read_line(&mut self) -> Option, decode::Error>>> { + if self.is_done { + return None; + } + if !self.peek_buf.is_empty() { + std::mem::swap(&mut self.peek_buf, &mut self.buf); + self.peek_buf.clear(); + Some(Ok(Ok(crate::decode(&self.buf).expect("only valid data in peek buf")))) + } else { + if self.buf.len() != MAX_LINE_LEN { + self.buf.resize(MAX_LINE_LEN, 0); + } + let (is_done, stopped_at, res) = Self::read_line_inner_exhaustive( + &mut self.read, + &mut self.buf, + self.delimiters, + self.fail_on_err_lines, + false, + self.trace, + ); + self.is_done = is_done; + self.stopped_at = stopped_at; + res + } + } + + /// Peek the next packet line without consuming it. Returns `None` if a stop-packet or an error + /// was encountered. + /// + /// Multiple calls to peek will return the same packet line, if there is one. + pub fn peek_line(&mut self) -> Option, decode::Error>>> { + if self.is_done { + return None; + } + if self.peek_buf.is_empty() { + self.peek_buf.resize(MAX_LINE_LEN, 0); + let (is_done, stopped_at, res) = Self::read_line_inner_exhaustive( + &mut self.read, + &mut self.peek_buf, + self.delimiters, + self.fail_on_err_lines, + true, + self.trace, + ); + self.is_done = is_done; + self.stopped_at = stopped_at; + res + } else { + Some(Ok(Ok(crate::decode(&self.peek_buf).expect("only valid data here")))) + } + } + + /// Return this instance as implementor of [`Read`][io::Read] assuming side bands to be used in all received packet lines. + /// Each invocation of [`read_line()`][io::BufRead::read_line()] returns a packet line. + /// + /// Progress or error information will be passed to the given `handle_progress(is_error, text)` function, with `is_error: bool` + /// being true in case the `text` is to be interpreted as error. + /// + /// _Please note_ that side bands need to be negotiated with the server. + pub fn as_read_with_sidebands ProgressAction>( + &mut self, + handle_progress: F, + ) -> WithSidebands<'_, T, F> { + WithSidebands::with_progress_handler(self, handle_progress) + } + + /// Same as [`as_read_with_sidebands(…)`][StreamingPeekableIter::as_read_with_sidebands()], but for channels without side band support. + /// + /// The type parameter `F` needs to be configured for this method to be callable using the 'turbofish' operator. + /// Use [`as_read()`][StreamingPeekableIter::as_read()]. + pub fn as_read_without_sidebands ProgressAction>(&mut self) -> WithSidebands<'_, T, F> { + WithSidebands::without_progress_handler(self) + } + + /// Same as [`as_read_with_sidebands(…)`][StreamingPeekableIter::as_read_with_sidebands()], but for channels without side band support. + /// + /// Due to the preconfigured function type this method can be called without 'turbofish'. + #[allow(clippy::type_complexity)] + pub fn as_read(&mut self) -> WithSidebands<'_, T, fn(bool, &[u8]) -> ProgressAction> { + WithSidebands::new(self) + } +} diff --git a/gix-packetline-blocking/src/read/mod.rs b/gix-packetline-blocking/src/read/mod.rs new file mode 100644 index 00000000000..3c42f9e596d --- /dev/null +++ b/gix-packetline-blocking/src/read/mod.rs @@ -0,0 +1,130 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/read/mod.rs. Run `just copy-packetline` to update it. + +#[cfg(any(feature = "blocking-io", feature = "async-io"))] +use crate::MAX_LINE_LEN; +use crate::{PacketLineRef, StreamingPeekableIter, U16_HEX_BYTES}; + +/// Allow the read-progress handler to determine how to continue. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ProgressAction { + /// Continue reading the next progress if available. + Continue, + /// Abort all IO even if more would be available, claiming the operation was interrupted. + Interrupt, +} + +#[cfg(any(feature = "blocking-io", feature = "async-io"))] +type ExhaustiveOutcome<'a> = ( + bool, // is_done + Option>, // stopped_at + Option, crate::decode::Error>>>, // actual method result +); + +mod error { + use std::fmt::{Debug, Display, Formatter}; + + use bstr::BString; + + /// The error representing an ERR packet line, as possibly wrapped into an `std::io::Error` in + /// [`read_line(…)`][super::StreamingPeekableIter::read_line()]. + #[derive(Debug)] + pub struct Error { + /// The contents of the ERR line, with `ERR` portion stripped. + pub message: BString, + } + + impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.message, f) + } + } + + impl std::error::Error for Error {} +} +pub use error::Error; + +impl StreamingPeekableIter { + /// Return a new instance from `read` which will stop decoding packet lines when receiving one of the given `delimiters`. + /// If `trace` is `true`, all packetlines received or sent will be passed to the facilities of the `gix-trace` crate. + pub fn new(read: T, delimiters: &'static [PacketLineRef<'static>], trace: bool) -> Self { + StreamingPeekableIter { + read, + #[cfg(any(feature = "blocking-io", feature = "async-io"))] + buf: vec![0; MAX_LINE_LEN], + peek_buf: Vec::new(), + delimiters, + fail_on_err_lines: false, + is_done: false, + stopped_at: None, + trace, + } + } + + /// Modify the peek buffer, overwriting the byte at `position` with the given byte to `replace_with` while truncating + /// it to contain only bytes until the newly replaced `position`. + /// + /// This is useful if you would want to remove 'special bytes' hidden behind, say a NULL byte to disappear and allow + /// standard line readers to read the next line as usual. + /// + /// **Note** that `position` does not include the 4 bytes prefix (they are invisible outside the reader) + pub fn peek_buffer_replace_and_truncate(&mut self, position: usize, replace_with: u8) { + let position = position + U16_HEX_BYTES; + self.peek_buf[position] = replace_with; + + let new_len = position + 1; + self.peek_buf.truncate(new_len); + self.peek_buf[..4].copy_from_slice(&crate::encode::u16_to_hex((new_len) as u16)); + } + + /// Returns the packet line that stopped the iteration, or + /// `None` if the end wasn't reached yet, on EOF, or if [`fail_on_err_lines()`][StreamingPeekableIter::fail_on_err_lines()] was true. + pub fn stopped_at(&self) -> Option> { + self.stopped_at + } + + /// Reset all iteration state allowing to continue a stopped iteration that is not yet at EOF. + /// + /// This can happen once a delimiter is reached. + pub fn reset(&mut self) { + let delimiters = std::mem::take(&mut self.delimiters); + self.reset_with(delimiters); + } + + /// Similar to [`reset()`][StreamingPeekableIter::reset()] with support to changing the `delimiters`. + pub fn reset_with(&mut self, delimiters: &'static [PacketLineRef<'static>]) { + self.delimiters = delimiters; + self.is_done = false; + self.stopped_at = None; + } + + /// If `value` is `true` the provider will check for special `ERR` packet lines and stop iteration when one is encountered. + /// + /// Use [`stopped_at()]`[`StreamingPeekableIter::stopped_at()`] to inspect the cause of the end of the iteration. + /// ne + pub fn fail_on_err_lines(&mut self, value: bool) { + self.fail_on_err_lines = value; + } + + /// Replace the reader used with the given `read`, resetting all other iteration state as well. + pub fn replace(&mut self, read: T) -> T { + let prev = std::mem::replace(&mut self.read, read); + self.reset(); + self.fail_on_err_lines = false; + prev + } + + /// Return the inner read + pub fn into_inner(self) -> T { + self.read + } +} + +#[cfg(feature = "blocking-io")] +mod blocking_io; + +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +mod async_io; + +mod sidebands; +#[cfg(any(feature = "blocking-io", feature = "async-io"))] +pub use sidebands::WithSidebands; diff --git a/gix-packetline-blocking/src/read/sidebands/async_io.rs b/gix-packetline-blocking/src/read/sidebands/async_io.rs new file mode 100644 index 00000000000..43f25c49435 --- /dev/null +++ b/gix-packetline-blocking/src/read/sidebands/async_io.rs @@ -0,0 +1,383 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/async_io.rs. Run `just copy-packetline` to update it. + +use std::{ + future::Future, + pin::Pin, + task::{Context, Poll}, +}; + +use futures_io::{AsyncBufRead, AsyncRead}; +use futures_lite::ready; + +use crate::{decode, read::ProgressAction, BandRef, PacketLineRef, StreamingPeekableIter, TextRef, U16_HEX_BYTES}; + +type ReadLineResult<'a> = Option, decode::Error>>>; +/// An implementor of [`AsyncBufRead`] yielding packet lines on each call to [`read_line()`][AsyncBufRead::read_line()]. +/// It's also possible to hide the underlying packet lines using the [`Read`][AsyncRead] implementation which is useful +/// if they represent binary data, like the one of a pack file. +pub struct WithSidebands<'a, T, F> +where + T: AsyncRead, +{ + state: State<'a, T>, + handle_progress: Option, + pos: usize, + cap: usize, +} + +impl<'a, T, F> Drop for WithSidebands<'a, T, F> +where + T: AsyncRead, +{ + fn drop(&mut self) { + if let State::Idle { ref mut parent } = self.state { + parent + .as_mut() + .expect("parent is always available if we are idle") + .reset(); + } + } +} + +impl<'a, T> WithSidebands<'a, T, fn(bool, &[u8]) -> ProgressAction> +where + T: AsyncRead, +{ + /// Create a new instance with the given provider as `parent`. + pub fn new(parent: &'a mut StreamingPeekableIter) -> Self { + WithSidebands { + state: State::Idle { parent: Some(parent) }, + handle_progress: None, + pos: 0, + cap: 0, + } + } +} + +enum State<'a, T> { + Idle { + parent: Option<&'a mut StreamingPeekableIter>, + }, + ReadLine { + read_line: Pin> + 'a>>, + parent_inactive: Option<*mut StreamingPeekableIter>, + }, +} + +/// # SAFETY +/// It's safe because T is `Send` and we have a test that assures that our `StreamingPeekableIter` is `Send` as well, +/// hence the `*mut _` is `Send`. +/// `read_line` isn't send and we can't declare it as such as it forces `Send` in all places (BUT WHY IS THAT A PROBLEM, I don't recall). +/// However, it's only used when pinned and thus isn't actually sent anywhere, it's a secondary state of the future used after it was Send +/// to a thread possibly. +// TODO: Is it possible to declare it as it should be? +#[allow(unsafe_code, clippy::non_send_fields_in_send_ty)] +unsafe impl<'a, T> Send for State<'a, T> where T: Send {} + +impl<'a, T, F> WithSidebands<'a, T, F> +where + T: AsyncRead + Unpin, + F: FnMut(bool, &[u8]) -> ProgressAction + Unpin, +{ + /// Create a new instance with the given `parent` provider and the `handle_progress` function. + /// + /// Progress or error information will be passed to the given `handle_progress(is_error, text)` function, with `is_error: bool` + /// being true in case the `text` is to be interpreted as error. + pub fn with_progress_handler(parent: &'a mut StreamingPeekableIter, handle_progress: F) -> Self { + WithSidebands { + state: State::Idle { parent: Some(parent) }, + handle_progress: Some(handle_progress), + pos: 0, + cap: 0, + } + } + + /// Create a new instance without a progress handler. + pub fn without_progress_handler(parent: &'a mut StreamingPeekableIter) -> Self { + WithSidebands { + state: State::Idle { parent: Some(parent) }, + handle_progress: None, + pos: 0, + cap: 0, + } + } + + /// Forwards to the parent [`StreamingPeekableIter::reset_with()`] + pub fn reset_with(&mut self, delimiters: &'static [PacketLineRef<'static>]) { + if let State::Idle { ref mut parent } = self.state { + parent + .as_mut() + .expect("parent is always available if we are idle") + .reset_with(delimiters) + } + } + + /// Forwards to the parent [`StreamingPeekableIter::stopped_at()`] + pub fn stopped_at(&self) -> Option> { + match self.state { + State::Idle { ref parent } => { + parent + .as_ref() + .expect("parent is always available if we are idle") + .stopped_at + } + _ => None, + } + } + + /// Set or unset the progress handler. + pub fn set_progress_handler(&mut self, handle_progress: Option) { + self.handle_progress = handle_progress; + } + + /// Effectively forwards to the parent [`StreamingPeekableIter::peek_line()`], allowing to see what would be returned + /// next on a call to [`read_line()`][io::BufRead::read_line()]. + /// + /// # Warning + /// + /// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it. + pub async fn peek_data_line(&mut self) -> Option>> { + match self.state { + State::Idle { ref mut parent } => match parent + .as_mut() + .expect("parent is always available if we are idle") + .peek_line() + .await + { + Some(Ok(Ok(PacketLineRef::Data(line)))) => Some(Ok(Ok(line))), + Some(Ok(Err(err))) => Some(Ok(Err(err))), + Some(Err(err)) => Some(Err(err)), + _ => None, + }, + _ => None, + } + } + + /// Read a packet line as string line. + pub fn read_line_to_string<'b>(&'b mut self, buf: &'b mut String) -> ReadLineFuture<'a, 'b, T, F> { + ReadLineFuture { parent: self, buf } + } + + /// Read a packet line from the underlying packet reader, returning empty lines if a stop-packetline was reached. + /// + /// # Warning + /// + /// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it. + pub async fn read_data_line(&mut self) -> Option, decode::Error>>> { + match &mut self.state { + State::Idle { parent: Some(parent) } => { + assert_eq!( + self.cap, 0, + "we don't support partial buffers right now - read-line must be used consistently" + ); + parent.read_line().await + } + _ => None, + } + } +} + +pub struct ReadDataLineFuture<'a, 'b, T: AsyncRead, F> { + parent: &'b mut WithSidebands<'a, T, F>, + buf: &'b mut Vec, +} + +impl<'a, 'b, T, F> Future for ReadDataLineFuture<'a, 'b, T, F> +where + T: AsyncRead + Unpin, + F: FnMut(bool, &[u8]) -> ProgressAction + Unpin, +{ + type Output = std::io::Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + assert_eq!( + self.parent.cap, 0, + "we don't support partial buffers right now - read-line must be used consistently" + ); + let Self { buf, parent } = &mut *self; + let line = ready!(Pin::new(parent).poll_fill_buf(cx))?; + buf.clear(); + buf.extend_from_slice(line); + let bytes = line.len(); + self.parent.cap = 0; + Poll::Ready(Ok(bytes)) + } +} + +pub struct ReadLineFuture<'a, 'b, T: AsyncRead, F> { + parent: &'b mut WithSidebands<'a, T, F>, + buf: &'b mut String, +} + +impl<'a, 'b, T, F> Future for ReadLineFuture<'a, 'b, T, F> +where + T: AsyncRead + Unpin, + F: FnMut(bool, &[u8]) -> ProgressAction + Unpin, +{ + type Output = std::io::Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + assert_eq!( + self.parent.cap, 0, + "we don't support partial buffers right now - read-line must be used consistently" + ); + let Self { buf, parent } = &mut *self; + let line = std::str::from_utf8(ready!(Pin::new(parent).poll_fill_buf(cx))?) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; + buf.clear(); + buf.push_str(line); + let bytes = line.len(); + self.parent.cap = 0; + Poll::Ready(Ok(bytes)) + } +} + +impl<'a, T, F> AsyncBufRead for WithSidebands<'a, T, F> +where + T: AsyncRead + Unpin, + F: FnMut(bool, &[u8]) -> ProgressAction + Unpin, +{ + fn poll_fill_buf(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + use std::io; + + use futures_lite::FutureExt; + { + let this = self.as_mut().get_mut(); + if this.pos >= this.cap { + let (ofs, cap) = loop { + match this.state { + State::Idle { ref mut parent } => { + let parent = parent.take().expect("parent to be present here"); + let inactive = parent as *mut _; + this.state = State::ReadLine { + read_line: parent.read_line().boxed_local(), + parent_inactive: Some(inactive), + } + } + State::ReadLine { + ref mut read_line, + ref mut parent_inactive, + } => { + let line = ready!(read_line.poll(cx)); + + this.state = { + let parent = parent_inactive.take().expect("parent pointer always set"); + // SAFETY: It's safe to recover the original mutable reference (from which + // the `read_line` future was created as the latter isn't accessible anymore + // once the state is set to Idle. In other words, either one or the other are + // accessible, never both at the same time. + // Also: We keep a pointer around which is protected by borrowcheck since it's created + // from a legal mutable reference which is moved into the read_line future - if it was manually + // implemented we would be able to re-obtain it from there. + #[allow(unsafe_code)] + let parent = unsafe { &mut *parent }; + State::Idle { parent: Some(parent) } + }; + + let line = match line { + Some(line) => line?.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?, + None => break (0, 0), + }; + + match this.handle_progress.as_mut() { + Some(handle_progress) => { + let band = line + .decode_band() + .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + const ENCODED_BAND: usize = 1; + match band { + BandRef::Data(d) => { + if d.is_empty() { + continue; + } + break (U16_HEX_BYTES + ENCODED_BAND, d.len()); + } + BandRef::Progress(d) => { + let text = TextRef::from(d).0; + match handle_progress(false, text) { + ProgressAction::Continue => {} + ProgressAction::Interrupt => { + return Poll::Ready(Err(io::Error::new( + std::io::ErrorKind::Other, + "interrupted by user", + ))) + } + }; + } + BandRef::Error(d) => { + let text = TextRef::from(d).0; + match handle_progress(true, text) { + ProgressAction::Continue => {} + ProgressAction::Interrupt => { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::Other, + "interrupted by user", + ))) + } + }; + } + }; + } + None => { + break match line.as_slice() { + Some(d) => (U16_HEX_BYTES, d.len()), + None => { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "encountered non-data line in a data-line only context", + ))) + } + } + } + } + } + } + }; + this.cap = cap + ofs; + this.pos = ofs; + } + } + let range = self.pos..self.cap; + match &self.get_mut().state { + State::Idle { parent } => Poll::Ready(Ok(&parent.as_ref().expect("parent always available").buf[range])), + State::ReadLine { .. } => unreachable!("at least in theory"), + } + } + + fn consume(self: Pin<&mut Self>, amt: usize) { + let this = self.get_mut(); + this.pos = std::cmp::min(this.pos + amt, this.cap); + } +} + +impl<'a, T, F> AsyncRead for WithSidebands<'a, T, F> +where + T: AsyncRead + Unpin, + F: FnMut(bool, &[u8]) -> ProgressAction + Unpin, +{ + fn poll_read(mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + use std::io::Read; + let mut rem = ready!(self.as_mut().poll_fill_buf(cx))?; + let nread = rem.read(buf)?; + self.consume(nread); + Poll::Ready(Ok(nread)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + fn receiver(_i: T) {} + + /// We want to declare items containing pointers of `StreamingPeekableIter` `Send` as well, so it must be `Send` itself. + #[test] + fn streaming_peekable_iter_is_send() { + receiver(StreamingPeekableIter::new(Vec::::new(), &[], false)); + } + + #[test] + fn state_is_send() { + let mut s = StreamingPeekableIter::new(Vec::::new(), &[], false); + receiver(State::Idle { parent: Some(&mut s) }); + } +} diff --git a/gix-packetline-blocking/src/read/sidebands/blocking_io.rs b/gix-packetline-blocking/src/read/sidebands/blocking_io.rs new file mode 100644 index 00000000000..c953ee4d26d --- /dev/null +++ b/gix-packetline-blocking/src/read/sidebands/blocking_io.rs @@ -0,0 +1,218 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/blocking_io.rs. Run `just copy-packetline` to update it. + +use std::{io, io::BufRead}; + +use crate::{read::ProgressAction, BandRef, PacketLineRef, StreamingPeekableIter, TextRef, U16_HEX_BYTES}; + +/// An implementor of [`BufRead`][io::BufRead] yielding packet lines on each call to [`read_line()`][io::BufRead::read_line()]. +/// It's also possible to hide the underlying packet lines using the [`Read`][io::Read] implementation which is useful +/// if they represent binary data, like the one of a pack file. +pub struct WithSidebands<'a, T, F> +where + T: io::Read, +{ + parent: &'a mut StreamingPeekableIter, + handle_progress: Option, + pos: usize, + cap: usize, +} + +impl<'a, T, F> Drop for WithSidebands<'a, T, F> +where + T: io::Read, +{ + fn drop(&mut self) { + self.parent.reset(); + } +} + +impl<'a, T> WithSidebands<'a, T, fn(bool, &[u8]) -> ProgressAction> +where + T: io::Read, +{ + /// Create a new instance with the given provider as `parent`. + pub fn new(parent: &'a mut StreamingPeekableIter) -> Self { + WithSidebands { + parent, + handle_progress: None, + pos: 0, + cap: 0, + } + } +} + +impl<'a, T, F> WithSidebands<'a, T, F> +where + T: io::Read, + F: FnMut(bool, &[u8]) -> ProgressAction, +{ + /// Create a new instance with the given `parent` provider and the `handle_progress` function. + /// + /// Progress or error information will be passed to the given `handle_progress(is_error, text)` function, with `is_error: bool` + /// being true in case the `text` is to be interpreted as error. + pub fn with_progress_handler(parent: &'a mut StreamingPeekableIter, handle_progress: F) -> Self { + WithSidebands { + parent, + handle_progress: Some(handle_progress), + pos: 0, + cap: 0, + } + } + + /// Create a new instance without a progress handler. + pub fn without_progress_handler(parent: &'a mut StreamingPeekableIter) -> Self { + WithSidebands { + parent, + handle_progress: None, + pos: 0, + cap: 0, + } + } + + /// Forwards to the parent [`StreamingPeekableIter::reset_with()`] + pub fn reset_with(&mut self, delimiters: &'static [PacketLineRef<'static>]) { + self.parent.reset_with(delimiters) + } + + /// Forwards to the parent [`StreamingPeekableIter::stopped_at()`] + pub fn stopped_at(&self) -> Option> { + self.parent.stopped_at + } + + /// Set or unset the progress handler. + pub fn set_progress_handler(&mut self, handle_progress: Option) { + self.handle_progress = handle_progress; + } + + /// Effectively forwards to the parent [`StreamingPeekableIter::peek_line()`], allowing to see what would be returned + /// next on a call to [`read_line()`][io::BufRead::read_line()]. + /// + /// # Warning + /// + /// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it. + pub fn peek_data_line(&mut self) -> Option>> { + match self.parent.peek_line() { + Some(Ok(Ok(PacketLineRef::Data(line)))) => Some(Ok(Ok(line))), + Some(Ok(Err(err))) => Some(Ok(Err(err))), + Some(Err(err)) => Some(Err(err)), + _ => None, + } + } + + /// Read a whole packetline from the underlying reader, with empty lines indicating a stop packetline. + /// + /// # Warning + /// + /// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it. + pub fn read_data_line(&mut self) -> Option, crate::decode::Error>>> { + assert_eq!( + self.cap, 0, + "we don't support partial buffers right now - read-line must be used consistently" + ); + self.parent.read_line() + } + + /// Like `BufRead::read_line()`, but will only read one packetline at a time. + /// + /// It will also be easier to call as sometimes it's unclear which implementation we get on a type like this with + /// plenty of generic parameters. + pub fn read_line_to_string(&mut self, buf: &mut String) -> io::Result { + assert_eq!( + self.cap, 0, + "we don't support partial buffers right now - read-line must be used consistently" + ); + let line = std::str::from_utf8(self.fill_buf()?).map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + buf.push_str(line); + let bytes = line.len(); + self.cap = 0; + Ok(bytes) + } +} + +impl<'a, T, F> BufRead for WithSidebands<'a, T, F> +where + T: io::Read, + F: FnMut(bool, &[u8]) -> ProgressAction, +{ + fn fill_buf(&mut self) -> io::Result<&[u8]> { + if self.pos >= self.cap { + let (ofs, cap) = loop { + let line = match self.parent.read_line() { + Some(line) => line?.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?, + None => break (0, 0), + }; + match self.handle_progress.as_mut() { + Some(handle_progress) => { + let band = line + .decode_band() + .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + const ENCODED_BAND: usize = 1; + match band { + BandRef::Data(d) => { + if d.is_empty() { + continue; + } + break (U16_HEX_BYTES + ENCODED_BAND, d.len()); + } + BandRef::Progress(d) => { + let text = TextRef::from(d).0; + match handle_progress(false, text) { + ProgressAction::Continue => {} + ProgressAction::Interrupt => { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "interrupted by user", + )) + } + }; + } + BandRef::Error(d) => { + let text = TextRef::from(d).0; + match handle_progress(true, text) { + ProgressAction::Continue => {} + ProgressAction::Interrupt => { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "interrupted by user", + )) + } + }; + } + }; + } + None => { + break match line.as_slice() { + Some(d) => (U16_HEX_BYTES, d.len()), + None => { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "encountered non-data line in a data-line only context", + )) + } + } + } + } + }; + self.cap = cap + ofs; + self.pos = ofs; + } + Ok(&self.parent.buf[self.pos..self.cap]) + } + + fn consume(&mut self, amt: usize) { + self.pos = std::cmp::min(self.pos + amt, self.cap); + } +} + +impl<'a, T, F> io::Read for WithSidebands<'a, T, F> +where + T: io::Read, + F: FnMut(bool, &[u8]) -> ProgressAction, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut rem = self.fill_buf()?; + let nread = rem.read(buf)?; + self.consume(nread); + Ok(nread) + } +} diff --git a/gix-packetline-blocking/src/read/sidebands/mod.rs b/gix-packetline-blocking/src/read/sidebands/mod.rs new file mode 100644 index 00000000000..605db0bbad9 --- /dev/null +++ b/gix-packetline-blocking/src/read/sidebands/mod.rs @@ -0,0 +1,11 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/mod.rs. Run `just copy-packetline` to update it. + +#[cfg(feature = "blocking-io")] +mod blocking_io; +#[cfg(feature = "blocking-io")] +pub use blocking_io::WithSidebands; + +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +mod async_io; +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +pub use async_io::WithSidebands; diff --git a/gix-packetline-blocking/src/write/async_io.rs b/gix-packetline-blocking/src/write/async_io.rs new file mode 100644 index 00000000000..77ef8069553 --- /dev/null +++ b/gix-packetline-blocking/src/write/async_io.rs @@ -0,0 +1,99 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/write/async_io.rs. Run `just copy-packetline` to update it. + +use std::{ + io, + pin::Pin, + task::{Context, Poll}, +}; + +use futures_io::AsyncWrite; + +use crate::{encode, MAX_DATA_LEN, U16_HEX_BYTES}; + +pin_project_lite::pin_project! { + /// An implementor of [`Write`][io::Write] which passes all input to an inner `Write` in packet line data encoding, + /// one line per `write(…)` call or as many lines as it takes if the data doesn't fit into the maximum allowed line length. + pub struct Writer { + #[pin] + inner: encode::LineWriter<'static, T>, + state: State, + } +} + +enum State { + Idle, + WriteData(usize), +} + +impl Writer { + /// Create a new instance from the given `write` + pub fn new(write: T) -> Self { + Writer { + inner: encode::LineWriter::new(write, &[], &[]), + state: State::Idle, + } + } + + /// Return the inner writer, consuming self. + pub fn into_inner(self) -> T { + self.inner.into_inner() + } + + /// Return a mutable reference to the inner writer, useful if packet lines should be serialized directly. + pub fn inner_mut(&mut self) -> &mut T { + &mut self.inner.writer + } +} + +/// Non-IO methods +impl Writer { + /// If called, each call to [`write()`][io::Write::write()] will write bytes as is. + pub fn enable_binary_mode(&mut self) { + self.inner.suffix = &[]; + } + /// If called, each call to [`write()`][io::Write::write()] will write the input as text, appending a trailing newline + /// if needed before writing. + pub fn enable_text_mode(&mut self) { + self.inner.suffix = &[b'\n']; + } +} + +impl AsyncWrite for Writer { + fn poll_write(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &[u8]) -> Poll> { + let mut this = self.project(); + loop { + match this.state { + State::Idle => { + if buf.is_empty() { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::Other, + "empty packet lines are not permitted as '0004' is invalid", + ))); + } + *this.state = State::WriteData(0) + } + State::WriteData(written) => { + while *written != buf.len() { + let data = &buf[*written..*written + (buf.len() - *written).min(MAX_DATA_LEN)]; + let n = futures_lite::ready!(this.inner.as_mut().poll_write(cx, data))?; + if n == 0 { + return Poll::Ready(Err(io::ErrorKind::WriteZero.into())); + } + *written += n; + *written -= U16_HEX_BYTES + this.inner.suffix.len(); + } + *this.state = State::Idle; + return Poll::Ready(Ok(buf.len())); + } + } + } + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.project().inner.poll_flush(cx) + } + + fn poll_close(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.project().inner.poll_close(cx) + } +} diff --git a/gix-packetline-blocking/src/write/blocking_io.rs b/gix-packetline-blocking/src/write/blocking_io.rs new file mode 100644 index 00000000000..4fb1192b7cc --- /dev/null +++ b/gix-packetline-blocking/src/write/blocking_io.rs @@ -0,0 +1,73 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/write/blocking_io.rs. Run `just copy-packetline` to update it. + +use std::io; + +use crate::{MAX_DATA_LEN, U16_HEX_BYTES}; + +/// An implementor of [`Write`][io::Write] which passes all input to an inner `Write` in packet line data encoding, +/// one line per `write(…)` call or as many lines as it takes if the data doesn't fit into the maximum allowed line length. +pub struct Writer { + /// the `Write` implementation to which to propagate packet lines + inner: T, + binary: bool, +} + +impl Writer { + /// Create a new instance from the given `write` + pub fn new(write: T) -> Self { + Writer { + inner: write, + binary: true, + } + } +} + +/// Non-IO methods +impl Writer { + /// If called, each call to [`write()`][io::Write::write()] will write bytes as is. + pub fn enable_binary_mode(&mut self) { + self.binary = true; + } + /// If called, each call to [`write()`][io::Write::write()] will write the input as text, appending a trailing newline + /// if needed before writing. + pub fn enable_text_mode(&mut self) { + self.binary = false; + } + /// Return the inner writer, consuming self. + pub fn into_inner(self) -> T { + self.inner + } + /// Return a mutable reference to the inner writer, useful if packet lines should be serialized directly. + pub fn inner_mut(&mut self) -> &mut T { + &mut self.inner + } +} + +impl io::Write for Writer { + fn write(&mut self, mut buf: &[u8]) -> io::Result { + if buf.is_empty() { + return Err(io::Error::new( + io::ErrorKind::Other, + "empty packet lines are not permitted as '0004' is invalid", + )); + } + + let mut written = 0; + while !buf.is_empty() { + let (data, rest) = buf.split_at(buf.len().min(MAX_DATA_LEN)); + written += if self.binary { + crate::encode::data_to_write(data, &mut self.inner) + } else { + crate::encode::text_to_write(data, &mut self.inner) + }?; + // subtract header (and trailing NL) because write-all can't handle writing more than it passes in + written -= U16_HEX_BYTES + usize::from(!self.binary); + buf = rest; + } + Ok(written) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} diff --git a/gix-packetline-blocking/src/write/mod.rs b/gix-packetline-blocking/src/write/mod.rs new file mode 100644 index 00000000000..9a4eaecf68b --- /dev/null +++ b/gix-packetline-blocking/src/write/mod.rs @@ -0,0 +1,23 @@ +//! DO NOT EDIT - this is a copy of gix-packetline/src/write/mod.rs. Run `just copy-packetline` to update it. + +use crate::Writer; + +#[cfg(all(not(feature = "blocking-io"), feature = "async-io"))] +pub(crate) mod async_io; + +#[cfg(feature = "blocking-io")] +pub(crate) mod blocking_io; + +/// Common methods +impl Writer { + /// As [`enable_text_mode()`][Writer::enable_text_mode()], but suitable for chaining. + pub fn text_mode(mut self) -> Self { + self.enable_text_mode(); + self + } + /// As [`enable_binary_mode()`][Writer::enable_binary_mode()], but suitable for chaining. + pub fn binary_mode(mut self) -> Self { + self.enable_binary_mode(); + self + } +} From 93c014920d0b0dee19f029f0a5e2e380e509ea3e Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 22:06:21 -0400 Subject: [PATCH 14/19] Show full diffs on CI So when gix-packetline-blocking/src was not updated, the diff from running the script is shown in full. Right now this is to investigate how the script does not seem to work on macOS, but it may be useful to keep it. --- .github/workflows/ci.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5da9b23a3cd..872a37e5fb3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -230,8 +230,14 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check that working tree is initially clean - run: git status; set -x; git diff --quiet + run: | + set -x + git status + git diff --exit-code - name: Regenerate gix-packetline-blocking/src run: etc/copy-packetline.sh - name: Check that gix-packetline-blocking/src was already up to date - run: git status; set -x; git diff --quiet + run: | + set -x + git status + git diff --exit-code From f2b3ae6293046ba7dcc9d5142f811837340e3dd6 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 8 Apr 2024 22:37:55 -0400 Subject: [PATCH 15/19] Don't use a trailing slash on the find root path While a command like `find foo` should add a `/` to make paths like `foo/bar`, some find implementations, including in macOS, add their own second slash even when there already was one, so `find foo/` outputs paths like `foo//bar`. (The current POSIX standard seems to forbid this, but it seems early standards did not, and in any case it is the behavior of find even on recent macOS systems.) That broke the copy-packetline.sh script on macOS: - Unsightly extra slashes were added to the headers placed in the copied/generated files inside gix-packetline-blocking/src. - Due to this happening on macOS, the script produced different output on macOS and other systems, preventing a precise portable CI check that the worktree remains clean even after running it. This commit addresses that by not adding a `/` to the path anymore, and making the required slight change to how the source prefix is removed (to be replaced by the target prefix) to accommodate this. That prefix removal was the original reason I had added a `/` originally. It served two purposes: to make the nature of the replacement slightly clearer to human readers, and to safeguard against a prefix that was not a whole path component. The first goal remains valuable but the benefit was slight, so it's okay to lose out on that. The second is no longer needed, because it was related to a way the script was more complex and thus more prone to to error at that time; the associated check was already removed. --- etc/copy-packetline.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 6f08d9a5e34..56e8e309c24 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -111,7 +111,7 @@ function generate_one () { local source_file target_file source_file="$1" - target_file="$target_dir/${source_file#"$source_dir"/}" + target_file="$target_dir${source_file#"$source_dir"}" if test -d "$source_file"; then mkdir -p -- "$target_file" @@ -144,7 +144,7 @@ function generate_all () { fail 'unable to remove target location' fi - find "$source_dir/" -print0 | while IFS= read -r -d '' source_file; do + find "$source_dir" -print0 | while IFS= read -r -d '' source_file; do generate_one "$source_file" done } From 6a5db0c5172342b48fa2fc7af2a92d59f3ce700f Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 9 Apr 2024 02:37:55 -0400 Subject: [PATCH 16/19] Create gix-packetline-blocking headers as regular comments See https://github.com/Byron/gitoxide/pull/1340#discussion_r1556918118. --- etc/copy-packetline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 56e8e309c24..60af1c4e2c6 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -87,7 +87,7 @@ function make_header () { endline="$2" # shellcheck disable=SC2016 # The backticks are intentionally literal. - printf '//! DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \ + printf '// DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \ "$source_file" "$endline" "$endline" } From ec2427fc78df81901a5636fb1213999a3a5ed969 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 9 Apr 2024 02:40:12 -0400 Subject: [PATCH 17/19] Rerun script to get `//` instead of `//!` comments This reruns `just copy-packetline`, turning the comments in files in gix-packetline-blocking/src from doc comments to regular ones. --- gix-packetline-blocking/src/decode.rs | 2 +- gix-packetline-blocking/src/encode/async_io.rs | 2 +- gix-packetline-blocking/src/encode/blocking_io.rs | 2 +- gix-packetline-blocking/src/encode/mod.rs | 2 +- gix-packetline-blocking/src/lib.rs | 2 +- gix-packetline-blocking/src/line/async_io.rs | 2 +- gix-packetline-blocking/src/line/blocking_io.rs | 2 +- gix-packetline-blocking/src/line/mod.rs | 2 +- gix-packetline-blocking/src/read/async_io.rs | 2 +- gix-packetline-blocking/src/read/blocking_io.rs | 2 +- gix-packetline-blocking/src/read/mod.rs | 2 +- gix-packetline-blocking/src/read/sidebands/async_io.rs | 2 +- gix-packetline-blocking/src/read/sidebands/blocking_io.rs | 2 +- gix-packetline-blocking/src/read/sidebands/mod.rs | 2 +- gix-packetline-blocking/src/write/async_io.rs | 2 +- gix-packetline-blocking/src/write/blocking_io.rs | 2 +- gix-packetline-blocking/src/write/mod.rs | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/gix-packetline-blocking/src/decode.rs b/gix-packetline-blocking/src/decode.rs index c814a46c8cd..cde0a58beed 100644 --- a/gix-packetline-blocking/src/decode.rs +++ b/gix-packetline-blocking/src/decode.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/decode.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/decode.rs. Run `just copy-packetline` to update it. use bstr::BString; diff --git a/gix-packetline-blocking/src/encode/async_io.rs b/gix-packetline-blocking/src/encode/async_io.rs index 45226351553..384ec856f5e 100644 --- a/gix-packetline-blocking/src/encode/async_io.rs +++ b/gix-packetline-blocking/src/encode/async_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/encode/async_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/encode/async_io.rs. Run `just copy-packetline` to update it. use std::{ io, diff --git a/gix-packetline-blocking/src/encode/blocking_io.rs b/gix-packetline-blocking/src/encode/blocking_io.rs index 244a14650cc..370ab992204 100644 --- a/gix-packetline-blocking/src/encode/blocking_io.rs +++ b/gix-packetline-blocking/src/encode/blocking_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/encode/blocking_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/encode/blocking_io.rs. Run `just copy-packetline` to update it. use std::io; diff --git a/gix-packetline-blocking/src/encode/mod.rs b/gix-packetline-blocking/src/encode/mod.rs index 56da585a018..238957cc4ca 100644 --- a/gix-packetline-blocking/src/encode/mod.rs +++ b/gix-packetline-blocking/src/encode/mod.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/encode/mod.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/encode/mod.rs. Run `just copy-packetline` to update it. use crate::MAX_DATA_LEN; diff --git a/gix-packetline-blocking/src/lib.rs b/gix-packetline-blocking/src/lib.rs index 51736330ddd..8b7ae4479d5 100644 --- a/gix-packetline-blocking/src/lib.rs +++ b/gix-packetline-blocking/src/lib.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/lib.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/lib.rs. Run `just copy-packetline` to update it. //! Read and write the git packet line wire format without copying it. //! diff --git a/gix-packetline-blocking/src/line/async_io.rs b/gix-packetline-blocking/src/line/async_io.rs index 65a5756d4d7..3a631c44c9a 100644 --- a/gix-packetline-blocking/src/line/async_io.rs +++ b/gix-packetline-blocking/src/line/async_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/line/async_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/line/async_io.rs. Run `just copy-packetline` to update it. use std::io; diff --git a/gix-packetline-blocking/src/line/blocking_io.rs b/gix-packetline-blocking/src/line/blocking_io.rs index 5d698c1536c..0354e3de77a 100644 --- a/gix-packetline-blocking/src/line/blocking_io.rs +++ b/gix-packetline-blocking/src/line/blocking_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/line/blocking_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/line/blocking_io.rs. Run `just copy-packetline` to update it. use std::io; diff --git a/gix-packetline-blocking/src/line/mod.rs b/gix-packetline-blocking/src/line/mod.rs index 9e27f36e4de..d584f0d3a8e 100644 --- a/gix-packetline-blocking/src/line/mod.rs +++ b/gix-packetline-blocking/src/line/mod.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/line/mod.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/line/mod.rs. Run `just copy-packetline` to update it. use bstr::BStr; diff --git a/gix-packetline-blocking/src/read/async_io.rs b/gix-packetline-blocking/src/read/async_io.rs index d35b2a5b08f..a0d347d84c7 100644 --- a/gix-packetline-blocking/src/read/async_io.rs +++ b/gix-packetline-blocking/src/read/async_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/read/async_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/read/async_io.rs. Run `just copy-packetline` to update it. use std::io; diff --git a/gix-packetline-blocking/src/read/blocking_io.rs b/gix-packetline-blocking/src/read/blocking_io.rs index 7857d2feb88..ce2035a1a3e 100644 --- a/gix-packetline-blocking/src/read/blocking_io.rs +++ b/gix-packetline-blocking/src/read/blocking_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/read/blocking_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/read/blocking_io.rs. Run `just copy-packetline` to update it. use std::io; diff --git a/gix-packetline-blocking/src/read/mod.rs b/gix-packetline-blocking/src/read/mod.rs index 3c42f9e596d..386fa492b80 100644 --- a/gix-packetline-blocking/src/read/mod.rs +++ b/gix-packetline-blocking/src/read/mod.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/read/mod.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/read/mod.rs. Run `just copy-packetline` to update it. #[cfg(any(feature = "blocking-io", feature = "async-io"))] use crate::MAX_LINE_LEN; diff --git a/gix-packetline-blocking/src/read/sidebands/async_io.rs b/gix-packetline-blocking/src/read/sidebands/async_io.rs index 43f25c49435..0582ea11ed8 100644 --- a/gix-packetline-blocking/src/read/sidebands/async_io.rs +++ b/gix-packetline-blocking/src/read/sidebands/async_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/async_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/async_io.rs. Run `just copy-packetline` to update it. use std::{ future::Future, diff --git a/gix-packetline-blocking/src/read/sidebands/blocking_io.rs b/gix-packetline-blocking/src/read/sidebands/blocking_io.rs index c953ee4d26d..c4dabb093a1 100644 --- a/gix-packetline-blocking/src/read/sidebands/blocking_io.rs +++ b/gix-packetline-blocking/src/read/sidebands/blocking_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/blocking_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/blocking_io.rs. Run `just copy-packetline` to update it. use std::{io, io::BufRead}; diff --git a/gix-packetline-blocking/src/read/sidebands/mod.rs b/gix-packetline-blocking/src/read/sidebands/mod.rs index 605db0bbad9..d9ff58f7809 100644 --- a/gix-packetline-blocking/src/read/sidebands/mod.rs +++ b/gix-packetline-blocking/src/read/sidebands/mod.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/mod.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/read/sidebands/mod.rs. Run `just copy-packetline` to update it. #[cfg(feature = "blocking-io")] mod blocking_io; diff --git a/gix-packetline-blocking/src/write/async_io.rs b/gix-packetline-blocking/src/write/async_io.rs index 77ef8069553..7b76c011fa9 100644 --- a/gix-packetline-blocking/src/write/async_io.rs +++ b/gix-packetline-blocking/src/write/async_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/write/async_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/write/async_io.rs. Run `just copy-packetline` to update it. use std::{ io, diff --git a/gix-packetline-blocking/src/write/blocking_io.rs b/gix-packetline-blocking/src/write/blocking_io.rs index 4fb1192b7cc..6a302ea6de7 100644 --- a/gix-packetline-blocking/src/write/blocking_io.rs +++ b/gix-packetline-blocking/src/write/blocking_io.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/write/blocking_io.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/write/blocking_io.rs. Run `just copy-packetline` to update it. use std::io; diff --git a/gix-packetline-blocking/src/write/mod.rs b/gix-packetline-blocking/src/write/mod.rs index 9a4eaecf68b..73585e91f47 100644 --- a/gix-packetline-blocking/src/write/mod.rs +++ b/gix-packetline-blocking/src/write/mod.rs @@ -1,4 +1,4 @@ -//! DO NOT EDIT - this is a copy of gix-packetline/src/write/mod.rs. Run `just copy-packetline` to update it. +// DO NOT EDIT - this is a copy of gix-packetline/src/write/mod.rs. Run `just copy-packetline` to update it. use crate::Writer; From d25e5d3781e0faca0b31bb6ea58d3057b3a5f16a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 9 Apr 2024 17:14:20 +0200 Subject: [PATCH 18/19] refactor - avoid `target` prefix as it reminds me of `target/` too much - run script on only one platform on CI, with explanation - make CI failures an error --- .github/workflows/ci.yml | 7 ++++--- etc/copy-packetline.sh | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 872a37e5fb3..52e3b5b93d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -220,9 +220,10 @@ jobs: matrix: os: - ubuntu-latest - - macos-latest - - windows-latest - continue-on-error: true + # We consider this script read-only and its effect is the same everywhere. + # However, when changes are made to `etc/copy-packetline.sh`, re-enable the other platforms for testing. + # - macos-latest + # - windows-latest runs-on: ${{ matrix.os }} defaults: run: diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index 60af1c4e2c6..c4f5f588b19 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -3,8 +3,8 @@ set -euC -o pipefail readonly source_dir='gix-packetline/src' -readonly target_parent_dir='gix-packetline-blocking' -readonly target_dir="$target_parent_dir/src" +readonly destination_parent_dir='gix-packetline-blocking' +readonly destination_dir="$destination_parent_dir/src" function fail () { printf '%s: error: %s\n' "$0" "$1" >&2 @@ -34,12 +34,12 @@ function merging () { } function target_dir_status () { - git status --porcelain --ignored=traditional -- "$target_dir" || + git status --porcelain --ignored=traditional -- "$destination_dir" || fail 'git-status failed' } -function check_target_dir () { - if ! test -e "$target_dir"; then +function check_destination_dir () { + if ! test -e "$destination_dir"; then # The target does not exist on disk, so nothing will be lost. Proceed. return fi @@ -111,7 +111,7 @@ function generate_one () { local source_file target_file source_file="$1" - target_file="$target_dir${source_file#"$source_dir"}" + target_file="$destination_dir${source_file#"$source_dir"}" if test -d "$source_file"; then mkdir -p -- "$target_file" @@ -134,13 +134,13 @@ function generate_all () { if ! test -d "$source_dir"; then fail "no source directory: $source_dir" fi - if ! test -d "$target_parent_dir"; then - fail "no target parent directory: $target_parent_dir" + if ! test -d "$destination_parent_dir"; then + fail "no target parent directory: $destination_parent_dir" fi - check_target_dir + check_destination_dir - rm -rf -- "$target_dir" # It may be a directory, symlink, or regular file. - if test -e "$target_dir"; then + rm -rf -- "$destination_dir" # It may be a directory, symlink, or regular file. + if test -e "$destination_dir"; then fail 'unable to remove target location' fi From ebb6ef5142185192c6aa7de04640ad678bff6c3d Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 9 Apr 2024 12:20:43 -0400 Subject: [PATCH 19/19] Use "input" and "output" as names in script; tweak CI In the copy-packetline.sh script: - Rename all identifiers with "source" to "input", and with "destination" or "target" to "output". This includes variables and functions. - Adjust messages accordingly. This is not a simple search and replace, since sometimes "target" becomes "output location" (to avoid giving the impression that it must always being as a dir). - Adjust comments accordingly, but use the word "destination" in a few comments that had said "target", where it's clear it applies to "output" and seems to be clearer wording than "output". - A couple very small unrelated comment rephrasings for clarity. In the copy-packetline job definition in ci.yml: - Put `fail-fast: false`, which has no effect on a matrix that only generates one job, but will aid in producing results for all three platforms if the other two have to be reenabled to test a change to the script. This is what I had been using `continue-on-error` for, but `fail-fast: false` is the better way to do it (and also avoids confusion with the other meanings of `continue-on-error`). - Tweak indentation so that removing the leading `# ` on the macos-latest and windows-latest entries would place them in the list (so that if they ever have to be reenabled to test changes to the script, then this will be slightly easier). --- .github/workflows/ci.yml | 5 +- etc/copy-packetline.sh | 99 ++++++++++++++++++++-------------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 52e3b5b93d7..285afbbb5e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -217,13 +217,14 @@ jobs: check-packetline: strategy: + fail-fast: false matrix: os: - ubuntu-latest # We consider this script read-only and its effect is the same everywhere. # However, when changes are made to `etc/copy-packetline.sh`, re-enable the other platforms for testing. - # - macos-latest - # - windows-latest + # - macos-latest + # - windows-latest runs-on: ${{ matrix.os }} defaults: run: diff --git a/etc/copy-packetline.sh b/etc/copy-packetline.sh index c4f5f588b19..9520b3b9011 100755 --- a/etc/copy-packetline.sh +++ b/etc/copy-packetline.sh @@ -2,9 +2,9 @@ set -euC -o pipefail -readonly source_dir='gix-packetline/src' -readonly destination_parent_dir='gix-packetline-blocking' -readonly destination_dir="$destination_parent_dir/src" +readonly input_dir='gix-packetline/src' +readonly output_parent_dir='gix-packetline-blocking' +readonly output_dir="$output_parent_dir/src" function fail () { printf '%s: error: %s\n' "$0" "$1" >&2 @@ -14,7 +14,7 @@ function fail () { function chdir_toplevel () { local root_padded root - # Find the working tree's root. (Padding is for the trailing-newline case.) + # Find the working tree's root. (Padding covers the trailing-newline case.) root_padded="$(git rev-parse --show-toplevel && echo -n .)" || fail 'git-rev-parse failed to find top-level dir' root="${root_padded%$'\n.'}" @@ -25,7 +25,7 @@ function chdir_toplevel () { function merging () { local git_dir_padded git_dir - # Find the .git directory. (Padding is for the trailing-newline case.) + # Find the .git directory. (Padding covers the trailing-newline case.) git_dir_padded="$(git rev-parse --git-dir && echo -n .)" || fail 'git-rev-parse failed to find git dir' git_dir="${git_dir_padded%$'\n.'}" @@ -33,30 +33,30 @@ function merging () { test -e "$git_dir/MERGE_HEAD" } -function target_dir_status () { - git status --porcelain --ignored=traditional -- "$destination_dir" || +function output_dir_status () { + git status --porcelain --ignored=traditional -- "$output_dir" || fail 'git-status failed' } -function check_destination_dir () { - if ! test -e "$destination_dir"; then - # The target does not exist on disk, so nothing will be lost. Proceed. +function check_output_dir () { + if ! test -e "$output_dir"; then + # The destination does not exist on disk, so nothing will be lost. Proceed. return fi if merging; then - # In a merge, it would be confusing to replace anything at the target. - if target_dir_status | grep -q '^'; then - fail 'target exists, and a merge is in progress' + # In a merge, it would be confusing to replace anything at the destination. + if output_dir_status | grep -q '^'; then + fail 'output location exists, and a merge is in progress' fi else - # We can lose data if anything of value at the target is not in the index. - # (This includes unstaged deletions, for two reasons. One is that we could - # lose track of which files had been deleted. More importantly, replacing a + # We can lose data if anything of value at the destination is not in the + # index. (This includes unstaged deletions, for two reasons. We could lose + # track of which files had been deleted. More importantly, replacing a # staged symlink or regular file with an unstaged directory is shown by # git-status as only a deletion, even if the directory is non-empty.) - if target_dir_status | grep -q '^.[^ ]'; then - fail 'target exists, with unstaged changes or ignored files' + if output_dir_status | grep -q '^.[^ ]'; then + fail 'output location exists, with unstaged changes or ignored files' fi fi } @@ -81,71 +81,70 @@ function first_line_ends_crlf () { } function make_header () { - local source_file endline + local input_file endline - source_file="$1" + input_file="$1" endline="$2" # shellcheck disable=SC2016 # The backticks are intentionally literal. printf '// DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \ - "$source_file" "$endline" "$endline" + "$input_file" "$endline" "$endline" } function copy_with_header () { - local source_file target_file endline + local input_file output_file endline - source_file="$1" - target_file="$2" + input_file="$1" + output_file="$2" - if first_line_ends_crlf "$source_file"; then + if first_line_ends_crlf "$input_file"; then endline=$'\r\n' else endline=$'\n' fi - make_header "$source_file" "$endline" | - cat -- - "$source_file" >"$target_file" + make_header "$input_file" "$endline" | cat -- - "$input_file" >"$output_file" } function generate_one () { - local source_file target_file + local input_file output_file - source_file="$1" - target_file="$destination_dir${source_file#"$source_dir"}" + input_file="$1" + output_file="$output_dir${input_file#"$input_dir"}" - if test -d "$source_file"; then - mkdir -p -- "$target_file" - elif test -L "$source_file"; then + if test -d "$input_file"; then + mkdir -p -- "$output_file" + elif test -L "$input_file"; then # Cover this case separately, for more useful error messages. - fail "source file is symbolic link: $source_file" - elif ! test -f "$source_file"; then + fail "input file is symbolic link: $input_file" + elif ! test -f "$input_file"; then # This covers less common kinds of files we can't or shouldn't process. - fail "source file neither regular file nor directory: $source_file" - elif [[ "$source_file" =~ \.rs$ ]]; then - copy_with_header "$source_file" "$target_file" + fail "input file neither regular file nor directory: $input_file" + elif [[ "$input_file" =~ \.rs$ ]]; then + copy_with_header "$input_file" "$output_file" else - fail "source file not named as Rust source code: $source_file" + fail "input file not named as Rust source code: $input_file" fi } function generate_all () { - local source_file + local input_file - if ! test -d "$source_dir"; then - fail "no source directory: $source_dir" + if ! test -d "$input_dir"; then + fail "no input directory: $input_dir" fi - if ! test -d "$destination_parent_dir"; then - fail "no target parent directory: $destination_parent_dir" + if ! test -d "$output_parent_dir"; then + fail "no output parent directory: $output_parent_dir" fi - check_destination_dir + check_output_dir - rm -rf -- "$destination_dir" # It may be a directory, symlink, or regular file. - if test -e "$destination_dir"; then - fail 'unable to remove target location' + rm -rf -- "$output_dir" # It may be a directory, symlink, or regular file. + if test -e "$output_dir"; then + fail 'unable to remove output location' fi - find "$source_dir" -print0 | while IFS= read -r -d '' source_file; do - generate_one "$source_file" + find "$input_dir" -print0 | while IFS= read -r -d '' input_file; do + generate_one "$input_file" done }