From eb8b8287a36fd945f21706012e40a73ed2695eb2 Mon Sep 17 00:00:00 2001 From: noahmmcgivern Date: Wed, 20 Dec 2023 16:19:47 -0500 Subject: [PATCH 1/2] Documentation updates and linting --- .github/workflows/release.yml | 7 +- Cargo.toml | 1 + Dockerfile.build => Dockerfile.builder | 0 Makefile | 60 +++--- README.md | 246 ++++++++++++++----------- crates/rusty-hog-scanner/src/lib.rs | 18 +- src/aws_scanning.rs | 4 +- src/bin/ankamali_hog.rs | 14 +- src/bin/berkshire_hog.rs | 2 +- src/bin/berkshire_hog_lambda.rs | 4 +- src/bin/choctaw_hog.rs | 4 +- src/bin/duroc_hog.rs | 11 +- src/bin/essex_hog.rs | 4 +- src/bin/gottingen_hog.rs | 2 +- src/bin/hante_hog.rs | 75 +++++--- src/git_scanning.rs | 9 +- src/google_scanning.rs | 6 +- 17 files changed, 264 insertions(+), 203 deletions(-) rename Dockerfile.build => Dockerfile.builder (100%) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8b48209..0a71b30 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -97,7 +97,8 @@ jobs: shell: bash run: | mkdir dist - cp ./target/x86_64-unknown-linux-musl/release/*_lambda dist + cp ./target/x86_64-unknown-linux-musl/release/berkshire_hog_lambda bootstrap + zip dist/berkshire_hog_lambda.zip bootstrap - uses: actions/upload-artifact@v3 with: name: bins-lambda @@ -119,10 +120,10 @@ jobs: echo "tag=${name//v}" >> "$GITHUB_OUTPUT" - name: Build Docker Images shell: bash - run: make docker-build VERSION=${{ steps.tagname.outputs.tag }} + run: make docker-build - name: Save Docker Images shell: bash - run: make docker-save VERSION=${{ steps.tagname.outputs.tag }} + run: make docker-save - uses: actions/upload-artifact@v3 with: name: docker diff --git a/Cargo.toml b/Cargo.toml index 0f108b0..e681efd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ flate2 = "1.0" tempfile = "3.2" path-clean = "0.1.0" anyhow = "1.0" +openssl = { version = "0.10", features = ["vendored"] } [dev-dependencies] escargot = "0.5.0" diff --git a/Dockerfile.build b/Dockerfile.builder similarity index 100% rename from Dockerfile.build rename to Dockerfile.builder diff --git a/Makefile b/Makefile index c876a36..37df482 100644 --- a/Makefile +++ b/Makefile @@ -1,38 +1,48 @@ null: @: -docker-build: check-version - @echo Building Rusty Hogs version: $(VERSION) - docker build --tag rust-builder -f Dockerfile.build . - docker build --tag wetfeet2000/ankamali_hog:$(VERSION) --build-arg HOG=ankamali -f Dockerfile.hog . - docker build --tag wetfeet2000/berkshire_hog:$(VERSION) --build-arg HOG=berkshire -f Dockerfile.hog . - docker build --tag wetfeet2000/choctaw_hog:$(VERSION) --build-arg HOG=choctaw -f Dockerfile.hog . - docker build --tag wetfeet2000/duroc_hog:$(VERSION) --build-arg HOG=duroc -f Dockerfile.hog . - docker build --tag wetfeet2000/essex_hog:$(VERSION) --build-arg HOG=essex -f Dockerfile.hog . - docker build --tag wetfeet2000/gottingen_hog:$(VERSION) --build-arg HOG=gottingen -f Dockerfile.hog . - docker build --tag wetfeet2000/hante_hog:$(VERSION) --build-arg HOG=hante -f Dockerfile.hog . +docker-build: + @echo Building Rusty Hog Docker Images + docker build --tag rust-builder -f Dockerfile.builder . + docker build --tag wetfeet2000/ankamali_hog --build-arg HOG=ankamali -f Dockerfile.hog . + docker build --tag wetfeet2000/berkshire_hog --build-arg HOG=berkshire -f Dockerfile.hog . + docker build --tag wetfeet2000/choctaw_hog --build-arg HOG=choctaw -f Dockerfile.hog . + docker build --tag wetfeet2000/duroc_hog --build-arg HOG=duroc -f Dockerfile.hog . + docker build --tag wetfeet2000/essex_hog --build-arg HOG=essex -f Dockerfile.hog . + docker build --tag wetfeet2000/gottingen_hog --build-arg HOG=gottingen -f Dockerfile.hog . + docker build --tag wetfeet2000/hante_hog --build-arg HOG=hante -f Dockerfile.hog . -docker-save: check-version +docker-save: + @echo Saving Rusty Hog Docker Images to archive docker image save -o images.tar \ - wetfeet2000/ankamali_hog:$(VERSION) \ - wetfeet2000/berkshire_hog:$(VERSION) \ - wetfeet2000/choctaw_hog:$(VERSION) \ - wetfeet2000/duroc_hog:$(VERSION) \ - wetfeet2000/essex_hog:$(VERSION) \ - wetfeet2000/gottingen_hog:$(VERSION) \ - wetfeet2000/hante_hog:$(VERSION) + wetfeet2000/ankamali_hog \ + wetfeet2000/berkshire_hog \ + wetfeet2000/choctaw_hog \ + wetfeet2000/duroc_hog \ + wetfeet2000/essex_hog \ + wetfeet2000/gottingen_hog \ + wetfeet2000/hante_hog docker-load: + @echo Loading Rusty Hog Docker Images from archive docker load -i images.tar docker-publish: check-version - docker push wetfeet2000/ankamali_hog:$(VERSION) - docker push wetfeet2000/berkshire_hog:$(VERSION) - docker push wetfeet2000/choctaw_hog:$(VERSION) - docker push wetfeet2000/duroc_hog:$(VERSION) - docker push wetfeet2000/essex_hog:$(VERSION) - docker push wetfeet2000/gottingen_hog:$(VERSION) - docker push wetfeet2000/hante_hog:$(VERSION) + @echo Publishing Rusty Hog Docker Images version: $(VERSION) + docker tag wetfeet2000/ankamali_hog:latest wetfeet2000/ankamali_hog:$(VERSION) + docker tag wetfeet2000/berkshire_hog:latest wetfeet2000/berkshire_hog:$(VERSION) + docker tag wetfeet2000/choctaw_hog:latest wetfeet2000/choctaw_hog:$(VERSION) + docker tag wetfeet2000/duroc_hog:latest wetfeet2000/duroc_hog:$(VERSION) + docker tag wetfeet2000/essex_hog:latest wetfeet2000/essex_hog:$(VERSION) + docker tag wetfeet2000/gottingen_hog:latest wetfeet2000/gottingen_hog:$(VERSION) + docker tag wetfeet2000/hante_hog:latest wetfeet2000/hante_hog:$(VERSION) + docker push -a wetfeet2000/ankamali_hog + docker push -a wetfeet2000/berkshire_hog + docker push -a wetfeet2000/choctaw_hog + docker push -a wetfeet2000/duroc_hog + docker push -a wetfeet2000/essex_hog + docker push -a wetfeet2000/gottingen_hog + docker push -a wetfeet2000/hante_hog check-version: @if test ! $(VERSION); then echo "VERSION is undefined"; exit 1; fi diff --git a/README.md b/README.md index 49dc80f..db8ffce 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,45 @@ - +

+ +

+
-Rusty Hog is a secret scanner built in Rust for performance, and based on TruffleHog which is written -in Python. Rusty Hog provides the following binaries: +# rusty-hog +Rusty Hog is a set of secret scanners built for performance using [Rust-lang](https://www.rust-lang.org/). It is based on [TruffleHog](https://github.com/trufflesecurity/trufflehog). -* Ankamali Hog: Scans for secrets in a Google doc. -* Berkshire Hog: Scans for secrets in an S3 bucket. -* Choctaw Hog: Scans for secrets in a Git repository. -* Duroc Hog: Scans for secrets in a directory, file, and archive. -* Essex Hog: Scans for secrets in a Confluence wiki page. -* Gottingen Hog: Scans for secrets in a JIRA issue. -* Slack Hog: Scans for secrets in a Slack Channel. +The secret scanners use regular expressions to detect the presence of sensitive information, such as API keys, passwords and personal information. + +Rusty Hog provides a default set of regular expressions for secret scanning, but also accepts a JSON object which contains custom regular expressions. + +## Contents +Rusty Hog provides the following binaries: +* Ankamali Hog: Google Docs secret scanner +* Berkshire Hog: S3 bucket secret scanner +* Choctaw Hog: Git repository secret scanner +* Duroc Hog: Filesystem (directory, file and archive) secret scanner +* Essex Hog: Confluence wiki page secret scanner +* Gottingen Hog: JIRA issue secret scanner +* Hante Hog: Slack Channel secret scanner ## Table of contents - - -- [Usage](#usage) - - [How to install using downloaded binaries](#how-to-install-using-downloaded-binaries) - - [How to run using DockerHub](#how-to-run-using-dockerhub) - - [How to build](#how-to-build) - - [How to build on Windows](#how-to-build-on-windows) - - [Anakamali Hog (GDoc Scanner) usage](#anakamali-hog-gdoc-scanner-usage) - - [Berkshire Hog (S3 Scanner - CLI) usage](#berkshire-hog-s3-scanner---cli-usage) - - [Berkshire Hog (S3 Scanner - Lambda) usage](#berkshire-hog-s3-scanner---lambda-usage) - - [Choctaw Hog (Git Scanner) usage](#choctaw-hog-git-scanner-usage) - - [Duroc Hog (file system scanner) usage](#duroc-hog-file-system-scanner-usage) +- Usage + - [Run via pre-built binaries](#run-via-pre-built-binaries) + - [Run via Docker](#run-via-docker) + - [Build instructions](#build-instructions) + - [Build instructions (lambda)](#build-instructions-lambda) + - [Build instructions (docs)](#build-instructions-docs) + - [Testing](#testing) + - [Linting](#linting) + - [Ankamali Hog (Google Docs scanner) usage](#ankamali-hog-google-docs-scanner-usage) + - [Berkshire Hog (S3 scanner - CLI) usage](#berkshire-hog-s3-scanner---cli-usage) + - [Berkshire Hog (S3 scanner - Lambda) usage](#berkshire-hog-s3-scanner---lambda-usage) + - [Choctaw Hog (Git scanner) usage](#choctaw-hog-git-scanner-usage) + - [Duroc Hog (Filesystem scanner) usage](#duroc-hog-filesystem-scanner-usage) - [Essex Hog (Confluence scanner) usage](#essex-hog-confluence-scanner-usage) - [Gottingen Hog (JIRA scanner) usage](#gottingen-hog-jira-scanner-usage) - - [Hante Hog (SLACK scanner) usage](#slack-hog-slack-scanner-usage) + - [Hante Hog (Slack scanner) usage](#hante-hog-slack-scanner-usage) - [Regex JSON file format](#regex-json-file-format) - [Allowlist JSON file format](#allowlist-json-file-format) -- [Project information](#project-information) +- Project information - [Open source license](#open-source-license) - [Support](#support) - [Community](#community) @@ -38,64 +48,93 @@ in Python. Rusty Hog provides the following binaries: - [Feature Roadmap](#feature-roadmap) - [What does the name mean?](#what-does-the-name-mean) - - -# Usage - -This project provides a set of scanners that use regular expressions to try and detect the presence of sensitive -information, such as API keys, passwords, and personal information. It includes a set of regular expressions by -default, but also accepts a JSON object containing your custom regular expressions. - -## How to install using downloaded binaries -Download and unzip the [latest ZIP](https://github.com/newrelic/rusty-hog/releases/) -on the releases tab. Then, run each binary with `-h` to see the usage. +## Run via pre-built binaries +Download via `curl`: +```shell script +curl -O https://github.com/newrelic/rusty-hog/releases/download/v1.0.11/rustyhogs-darwin-choctaw_hog-1.0.11.zip +``` +Or, download via `wget`: ```shell script wget https://github.com/newrelic/rusty-hog/releases/download/v1.0.11/rustyhogs-darwin-choctaw_hog-1.0.11.zip +``` + +Unzip binary and run the help command: +```shell script unzip rustyhogs-darwin-choctaw_hog-1.0.11.zip darwin_releases/choctaw_hog -h ``` -## How to run using DockerHub -Rusty Hog Docker images can be found at the authors personal DockerHub page [here](https://hub.docker.com/u/wetfeet2000) -A Docker Image is built for each Hog and for each release. So to use choctaw_hog you would run the following commands: +## Run via Docker +Docker images for Rusty Hog are available through [DockerHub](https://hub.docker.com/u/wetfeet2000). +Download and run choctaw_hog: +```shell script +docker pull wetfeet2000/choctaw_hog +docker run -it --rm wetfeet2000/choctaw_hog --help +``` + +Hogs can also be downloaded at a specific version (e.g. `v1.0.10`): ```shell script docker pull wetfeet2000/choctaw_hog:1.0.10 docker run -it --rm wetfeet2000/choctaw_hog:1.0.10 --help ``` -## How to build -- Ensure you have [Rust](https://www.rust-lang.org/learn/get-started) installed and on your path. -- Clone this repo, and then run `cargo build --release`. The binaries are located in `target/release`. -- To build and view HTML documents, run ```cargo doc --no-deps --open```. -- To run unit tests, run ```cargo test```. -- To cross-compile Berkshire Hog for the AWS Lambda environment, run the following commands and upload berkshire_lambda.zip to -your AWS Lambda dashboard: +## Build instructions +- Install [Rust-lang](https://www.rust-lang.org/learn/get-started) +- Ensure that Rust is defined in your path environment variable + +Clone this repository: +``` +git clone https://github.com/newrelic/rusty-hog.git +``` + +Build binary executables: ```shell script -docker run --rm -it -v "$(pwd)":/home/rust/src ekidd/rust-musl-builder cargo build --release -cp target/x86_64-unknown-linux-musl/release/berkshire_hog bootstrap -zip -j berkshire_lambda.zip bootstrap +cargo build --release ``` -## How to build on Windows -You will need to compile static OpenSSL binaries and tell Rust/Cargo where to find them: +Binary executables are located in `./target/release` + +## Build instructions (lambda) +Ensure that you have [Cross](https://github.com/cross-rs/cross) installed: +```shell script +cargo install cross --git https://github.com/cross-rs/cross ``` -mkdir \Tools -cd \Tools -git clone https://github.com/Microsoft/vcpkg.git -cd vcpkg -.\bootstrap-vcpkg.bat -.\vcpkg.exe install openssl:x64-windows-static - -$env:OPENSSL_DIR = 'C:\Tools\vcpkg\installed\x64-windows-static' -$env:OPENSSL_STATIC = 'Yes' -[System.Environment]::SetEnvironmentVariable('OPENSSL_DIR', $env:OPENSSL_DIR, [System.EnvironmentVariableTarget]::User) -[System.Environment]::SetEnvironmentVariable('OPENSSL_STATIC', $env:OPENSSL_STATIC, [System.EnvironmentVariableTarget]::User) + +Cross-compile Berkshire Hog for an AWS Lambda environment: +```shell script +cross build --release --target x86_64-unknown-linux-musl +cp target/x86_64-unknown-linux-musl/release/berkshire_hog_lambda bootstrap +zip berkshire_hog_lambda.zip bootstrap ``` -You can now follow the main build instructions listed above. -## Anakamali Hog (GDoc Scanner) usage +Deploy berkshire_hog_lambda.zip to AWS Lambda. + +## Build instructions (docs) +Build and view documentation: +```shell script +cargo doc --no-deps --open +``` + +## Testing +Run unit tests: +```shell script +cargo test --release +``` + +## Linting +Automatically format Rust code according to style guidelines: +```shell script +cargo fmt --all +``` + +Automatically lint Rust code to fix common mistakes: +```shell script +cargo clippy --fix +``` + +## Ankamali Hog (Google Docs scanner) usage ``` USAGE: ankamali_hog [FLAGS] [OPTIONS] @@ -121,7 +160,7 @@ ARGS: The ID of the Google drive file you want to scan ``` -## Berkshire Hog (S3 Scanner - CLI) usage +## Berkshire Hog (S3 scanner - CLI) usage ``` USAGE: berkshire_hog [FLAGS] [OPTIONS] @@ -151,7 +190,7 @@ ARGS: ``` -## Berkshire Hog (S3 Scanner - Lambda) usage +## Berkshire Hog (S3 scanner - Lambda) usage Berkshire Hog is currently designed to be used as a Lambda function. This is the basic data flow:
     ┌───────────┐              ┌───────┐     ┌────────────────┐     ┌────────────┐
@@ -166,7 +205,7 @@ In order to run Berkshire Hog this way, set up the following:
 2) Set up the SQS topic to accept events from S3, including IAM permissions.
 3) Run Berkshire Hog with IAM access to SQS and S3.
 
-## Choctaw Hog (Git Scanner) usage
+## Choctaw Hog (Git scanner) usage
 ```
 USAGE:
     choctaw_hog [FLAGS] [OPTIONS] 
@@ -196,7 +235,7 @@ ARGS:
         Sets the path (or URL) of the Git repo to scan. SSH links must include username (git@)
 ```
 
-## Duroc Hog (file system scanner) usage
+## Duroc Hog (Filesystem scanner) usage
 ```
 USAGE:
     duroc_hog [FLAGS] [OPTIONS] 
@@ -278,7 +317,7 @@ ARGS:
         The ID (e.g. PROJECT-123) of the Jira issue you want to scan
 ```
 
-## Hante Hog (SLACK scanner) usage
+## Hante Hog (Slack scanner) usage
 ```
 Slack secret scanner in Rust.
 
@@ -311,30 +350,28 @@ OPTIONS:
 ```
 
 ## Regex JSON file format
+The `--regex` option for each scanner allows users to provide the path of a customized JSON file containing regular expressions which match sensitive material.
 
-The regex option on scanners allows users to provide a path to their own JSON file of regular
-expressions that match sensitive material. Any provided file currently will replace, not append to, the default 
-regular expressions provided by SecretScanner. The expected format of the file is a single json object. 
+The provided JSON file will replace, not append to, the default regular expressions.
 
-The keys should be names for the type of secret each regex entry will detect, as the keys will be used for the reason 
-properties output by the scanner.
+The expected format of the provided JSON file is a single JSON object.
 
-Each value should be a string containing a valid [https://docs.rs/regex/1.3.9/regex/#syntax](regular expression for Rust) 
-that should match the type of secret described by its corresponding key.
+The keys represent the secret type that each value will detect, defined using Regex. The keys will be used for the reason property, which is output by the scanner.
 
-As of version 1.0.8, the Rusty Hog engine also supports objects as values for each secret. 
-The object can contain all of the following:
+Each value should be a string containing a valid [regular expression for Rust](https://docs.rs/regex/1.3.9/regex/#syntax), which matches the secret described by its corresponding key.
+
+As of version 1.0.8, the Rusty Hog engine also supports objects as values for each secret.
 
-- a pattern property with the matching regex expression (mandatory)
-- an entropy_filter property with a boolean value to enable entropy scanning for this information (mandatory)
+The object can contain all of the following:
+- a pattern property with the matching Regex (mandatory)
+- an entropy_filter property with a boolean value to enable entropy scanning for this secret (mandatory)
 - a threshold property to customize the entropy tolerance on a scale of 0 - 1 (optional, will adjust for old 1-8 format, default 0.6)
 - a keyspace property to indicate how many possible values are in the key, e.g. 16 for hex, 64 for base64, 128 for ASCII (optional, default 128)
 - a make_ascii_lowercase property to indicate whether Rust should perform .make_ascii_lowercase() on the key before calculating entropy (optional, default false)
 
-The higher the threshold, the more entropy is required in the secret to consider it a match.
+The higher the threshold, the more entropy is required in the secret for it to be considered a match.
 
 An example of this format is here:
-
 ```json
 {
     "Generic Secret": {
@@ -342,7 +379,7 @@ An example of this format is here:
         "entropy_filter": true,
         "threshold": "0.6"
     },
-    "Slack Token": { 
+    "Slack Token": {
         "pattern": "(xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})",
         "entropy_filter": true,
         "threshold": "0.6",
@@ -357,9 +394,7 @@ An example of this format is here:
 }
 ```
 
-
 As of version 1.0.11, the current default regex JSON used is as follows:
-
 ```json
 {
 	"Slack Token": "(xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})",
@@ -439,21 +474,19 @@ As of version 1.0.11, the current default regex JSON used is as follows:
 ```
 
 ## Allowlist JSON file format
+You can provide an allowlist to each secret scanner. An allowlist lets you specify a list of regular expressions for each pattern that will be ignored by the secret scanner.
 
-Scanners provide an allowlist feature. This allows you to specify a list of regular expressions for each pattern that
-will be ignored by the scanner. You can now optionally supply a list of regular expressions that are evaluated against 
-the file path as well. 
+You can also supply an optional list of regular expressions which are evaluated against the file path.
 
-The format for this allowlist file should be a single json object. Each key in the allowlist should match a key in the 
-regex json, and the value can be one of two things:
-1) An array of strings that are exceptions for that regex pattern. For example:
-2) An object with at least one key (patterns) and optionally a second key (paths). 
+The format for this allowlist file should be a single JSON object.
 
-In addition, you can specify the key `` which is evaluated against all patterns. 
-
-The following is the default allowlist included in all scans:
+Each key in the allowlist should match a key in the Regex json. The value can be one of the following:
+- An array of strings that are exceptions for that Regex
+- An object with at least one key (patterns) and optionally a second key (paths)
 
+In addition, you can specify the `` key, which is evaluated against all patterns.
 
+The following is the default allowlist included in all scans:
 ```json
 {
 	"Email address": {
@@ -509,47 +542,41 @@ The following is the default allowlist included in all scans:
 }
 ```
 
-Be aware that in these are strings, not regex expressions, and the keys for this allowlist have to a key in the regex json.
+Be aware that the values in this JSON object are strings, not regular expressions.
+
+The keys for this allowlist have to be a key in the Regex JSON.
+
 Keys are case-sensitive.
 
-# Project information
 ## Open source license
-
 This project is distributed under the [Apache 2 license](LICENSE).
 
 ## Support
-
 New Relic has open-sourced this project. This project is provided AS-IS WITHOUT WARRANTY OR SUPPORT, although you can report issues and contribute to the project here on GitHub.
 
 _Please do not report issues with this software to New Relic Global Technical Support._
 
 ## Community
-
 New Relic hosts and moderates an online forum where customers can interact with New Relic employees as well as other customers to get help and share best practices. Like all official New Relic open source projects, there's a related Community topic in the New Relic Explorer's Hub. You can find this project's topic/threads here:
 
 https://discuss.newrelic.com/t/rusty-hog-multi-platform-secret-key-scanner/90117
 
 ## Issues / enhancement requests
-
 Submit issues and enhancement requests in the [Issues tab of this repository](../../issues). Please search for and review the existing open issues before submitting a new issue.
 
 ## Contributing
-
 Contributions are welcome (and if you submit a enhancement request, expect to be invited to contribute it yourself). Please review our [Contributors Guide](CONTRIBUTING.md).
 
 Keep in mind that when you submit your pull request, you'll need to sign the CLA via the click-through using CLA-Assistant. If you'd like to execute our corporate CLA, or if you have any questions, please drop us an email at opensource@newrelic.com.
 
 
 ## Feature Roadmap
-  
 - 1.1: Enterprise features
     - [ ] Support config files (instead of command line args)
     - [ ] Support environment variables instead of CLI args
     - [ ] Multi-threading
     - [ ] Better context detection and false positive filtering (GitHound, machine learning)
     - [ ] Use Rusoto instead of s3-rust
-    - [x] Add JIRA scanner
-    - [x] Add file-system & archive scanner
     - [ ] Use Rust features to reduce compilation dependencies?
 
 - 1.2: Integration with larger scripts and UIs
@@ -560,10 +587,9 @@ Keep in mind that when you submit your pull request, you'll need to sign the CLA
     - [ ] Agent/manager model
     - [ ] Scheduler process (blocked by save state support)
 
-
 ## What does the name mean?
-TruffleHog is considered the de facto standard / original secret scanner. I have been
-building a suite of secret scanning tools for various platforms based on TruffleHog
-and needed a naming scheme, so I started at the top of Wikipedia's
-[list of pig breeds](https://en.wikipedia.org/wiki/List_of_pig_breeds).
-Thus each tool name is a breed of pig starting at "A" and working up.
+[TruffleHog](https://github.com/trufflesecurity/trufflehog) is considered the de facto standard / original secret scanner.
+
+We have built a suite of secret scanning tools for various platforms based on TruffleHog and needed a naming schema.
+
+The naming schema is inspired by the [list of pig breeds](https://en.wikipedia.org/wiki/List_of_pig_breeds) from Wikipedia. Each tool name is a breed of pig starting at "A" and working down alphabetically.
diff --git a/crates/rusty-hog-scanner/src/lib.rs b/crates/rusty-hog-scanner/src/lib.rs
index 3dc998a..1a87123 100644
--- a/crates/rusty-hog-scanner/src/lib.rs
+++ b/crates/rusty-hog-scanner/src/lib.rs
@@ -1048,12 +1048,12 @@ impl PartialEq for SecretScanner {
             && self.regex_map.keys().eq(other.regex_map.keys())
             && self.pretty_print == other.pretty_print
             && match self.output_path.as_ref() {
-            None => other.output_path.is_none(),
-            Some(s) => match other.output_path.as_ref() {
-                None => false,
-                Some(t) => *s == *t,
-            },
-        }
+                None => other.output_path.is_none(),
+                Some(s) => match other.output_path.as_ref() {
+                    None => false,
+                    Some(t) => *s == *t,
+                },
+            }
     }
 }
 
@@ -1109,7 +1109,7 @@ mod tests {
             not_so_secret_but_has_the_word_secret_and_is_long
         "#,
         )
-            .into_bytes();
+        .into_bytes();
         let output = SecretScanner::entropy_findings(test_string.as_slice(), 0.6);
         // println!("{:?}", output);
         assert_eq!(output.len(), 1);
@@ -1164,7 +1164,7 @@ mod tests {
             not_so_secret_but_has_the_word_secret_and_is_long
         "#,
         )
-            .into_bytes();
+        .into_bytes();
         let mut findings: Vec<(String, String)> = Vec::new();
         // Main loop - split the data based on newlines, then run get_matches() on each line,
         // then make a list of findings in output
@@ -1209,7 +1209,7 @@ mod tests {
             @
         "#,
         )
-            .into_bytes();
+        .into_bytes();
         let mut findings: Vec<(String, String)> = Vec::new();
         // Main loop - split the data based on newlines, then run get_matches() on each line,
         // then make a list of findings in output
diff --git a/src/aws_scanning.rs b/src/aws_scanning.rs
index 8acc035..441b088 100644
--- a/src/aws_scanning.rs
+++ b/src/aws_scanning.rs
@@ -53,11 +53,11 @@
 use encoding::all::ASCII;
 use encoding::{DecoderTrap, Encoding};
 use log::{self, error, trace};
+use rusty_hog_scanner::SecretScanner;
 use s3::bucket::Bucket;
 use serde_derive::{Deserialize, Serialize};
 use simple_error::SimpleError;
 use std::str;
-use rusty_hog_scanner::SecretScanner;
 
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Default)]
 /// `serde_json` object that represents a single found secret - finding
@@ -129,7 +129,7 @@ impl S3Scanner {
                 }
                 if !strings_found.is_empty() {
                     let new_line_string = ASCII
-                        .decode(&new_line, DecoderTrap::Ignore)
+                        .decode(new_line, DecoderTrap::Ignore)
                         .unwrap_or_else(|_| "".parse().unwrap());
                     output.push(S3Finding {
                         diff: new_line_string,
diff --git a/src/bin/ankamali_hog.rs b/src/bin/ankamali_hog.rs
index 1ed796b..62b420d 100644
--- a/src/bin/ankamali_hog.rs
+++ b/src/bin/ankamali_hog.rs
@@ -33,11 +33,11 @@ extern crate yup_oauth2 as oauth2;
 use clap::ArgMatches;
 use drive3::DriveHub;
 use log::{self, error, info};
+use oauth2::{InstalledFlowAuthenticator, InstalledFlowReturnMethod};
+use rusty_hog_scanner::{SecretScanner, SecretScannerBuilder};
+use rusty_hogs::google_scanning::{GDriveFileInfo, GDriveScanner};
 use simple_error::SimpleError;
 use std::path::Path;
-use rusty_hogs::google_scanning::{GDriveFileInfo, GDriveScanner};
-use rusty_hog_scanner::{SecretScanner, SecretScannerBuilder};
-use oauth2::{InstalledFlowAuthenticator, InstalledFlowReturnMethod};
 
 /// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
 #[tokio::main]
@@ -67,7 +67,8 @@ async fn main() {
 
 /// Main logic contained here. Get the CLI variables, setup OAuth, setup GDriveScanner and output
 /// the results.
-async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {    // Set logging
+async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {
+    // Set logging
     SecretScanner::set_logging(arg_matches.occurrences_of("VERBOSE"));
 
     // Initialize some variables
@@ -90,7 +91,10 @@ async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {    //
         .build()
         .await
         .expect("failed to create authenticator (try deleting temp_token and restarting)");
-    let hub = DriveHub::new(hyper::Client::builder().build(hyper_rustls::HttpsConnector::with_native_roots()), auth);
+    let hub = DriveHub::new(
+        hyper::Client::builder().build(hyper_rustls::HttpsConnector::with_native_roots()),
+        auth,
+    );
 
     // get some initial info about the file
     let gdriveinfo = GDriveFileInfo::new(file_id, &hub).await.unwrap();
diff --git a/src/bin/berkshire_hog.rs b/src/bin/berkshire_hog.rs
index e70a0f3..b333227 100644
--- a/src/bin/berkshire_hog.rs
+++ b/src/bin/berkshire_hog.rs
@@ -38,8 +38,8 @@ use simple_error::{require_with, try_with};
 use std::str;
 use url::Url;
 
-use rusty_hogs::aws_scanning::{S3Finding, S3Scanner};
 use rusty_hog_scanner::{SecretScanner, SecretScannerBuilder};
+use rusty_hogs::aws_scanning::{S3Finding, S3Scanner};
 use std::collections::HashSet;
 
 /// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
diff --git a/src/bin/berkshire_hog_lambda.rs b/src/bin/berkshire_hog_lambda.rs
index b5c8245..ac03574 100644
--- a/src/bin/berkshire_hog_lambda.rs
+++ b/src/bin/berkshire_hog_lambda.rs
@@ -18,8 +18,8 @@ extern crate s3;
 
 use lambda_runtime::{handler_fn, Context, Error};
 use log::{self, warn, LevelFilter};
-use rusty_hogs::aws_scanning::{S3Finding, S3Scanner};
 use rusty_hog_scanner::SecretScannerBuilder;
+use rusty_hogs::aws_scanning::{S3Finding, S3Scanner};
 use s3::bucket::Bucket;
 use s3::creds::Credentials;
 use s3::region::Region;
@@ -160,7 +160,7 @@ async fn my_handler(event: CustomEvent, _: Context) -> Result Result<(), SimpleError> {
     // Do the scan
     let git_scanner = GitScanner::new_from_scanner(secret_scanner).init_git_repo(
         source_path,
-        &dest_dir_path,
+        dest_dir_path,
         sshkeypath,
         sshkeyphrase,
         httpsuser,
diff --git a/src/bin/duroc_hog.rs b/src/bin/duroc_hog.rs
index ec90e7d..5b2d140 100644
--- a/src/bin/duroc_hog.rs
+++ b/src/bin/duroc_hog.rs
@@ -154,7 +154,7 @@ fn scan_dir(
 
     let scanning_closure = |file_path: &Path| {
         let f = File::open(file_path).unwrap();
-        let mut inner_findings = scan_file(file_path, &ss, f, "", unzip);
+        let mut inner_findings = scan_file(file_path, ss, f, "", unzip);
         for d in inner_findings.drain() {
             output.insert(d);
         }
@@ -175,7 +175,7 @@ where
 {
     for entry in WalkDir::new(fspath).into_iter().filter_map(|e| e.ok()) {
         if entry.file_type().is_file() && PathBuf::from(entry.path()).clean() != output_file {
-            closure(&entry.path());
+            closure(entry.path());
         }
     }
 }
@@ -322,7 +322,7 @@ fn scan_bytes(input: Vec, ss: &SecretScanner, path: String) -> HashSet".parse().unwrap());
                 findings.insert(FileFinding {
                     diff: new_line_string,
@@ -355,19 +355,20 @@ mod tests {
 
     fn write_temp_file(dir: &TempDir, filename: &str, contents: &str) {
         let file_path = dir.path().join(filename);
-        let mut tmp_file = File::create(&file_path).unwrap();
+        let mut tmp_file = File::create(file_path).unwrap();
         write!(tmp_file, "{}", contents).unwrap();
     }
 
     fn read_temp_file(dir: &TempDir, filename: &str) -> String {
         let mut contents = String::new();
         let file_path = dir.path().join(filename);
-        let mut file_handle = File::open(&file_path).unwrap();
+        let mut file_handle = File::open(file_path).unwrap();
         file_handle.read_to_string(&mut contents).unwrap();
         contents
     }
 
     #[test]
+    #[cfg(not(target_os = "windows"))]
     fn does_not_scan_output_file() {
         let temp_dir = TempDir::new().unwrap();
 
diff --git a/src/bin/essex_hog.rs b/src/bin/essex_hog.rs
index a59d196..506382d 100644
--- a/src/bin/essex_hog.rs
+++ b/src/bin/essex_hog.rs
@@ -141,7 +141,7 @@ async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {
     };
 
     // fetch the content of confluence page along with the comments
-    let page = get_page(hyper_client, auth_string, &base_url, &page_id).await;
+    let page = get_page(hyper_client, auth_string, base_url, page_id).await;
 
     // find secrets in page body and comments
     let mut content = page.body;
@@ -240,7 +240,7 @@ where
     let r = req_builder.body(Body::empty()).unwrap();
     let resp = hyper_client.request(r).await.unwrap();
     debug!("sending request to {}", full_url);
-    let status = resp.status().clone();
+    let status = resp.status();
     debug!("Response: {:?}", status);
     let data = body::to_bytes(resp.into_body()).await.unwrap();
     let data_vec: Vec = data.to_vec();
diff --git a/src/bin/gottingen_hog.rs b/src/bin/gottingen_hog.rs
index 6de7d8b..9049beb 100644
--- a/src/bin/gottingen_hog.rs
+++ b/src/bin/gottingen_hog.rs
@@ -207,7 +207,7 @@ where
     let r = req_builder.body(Body::empty()).unwrap();
     let resp = hyper_client.request(r).await.unwrap();
     debug!("sending request to {}", full_url);
-    let status = resp.status().clone();
+    let status = resp.status();
     debug!("Response: {:?}", status);
     let data = body::to_bytes(resp.into_body()).await.unwrap();
     let data_vec: Vec = data.to_vec();
diff --git a/src/bin/hante_hog.rs b/src/bin/hante_hog.rs
index 9ff0b3b..7acb471 100644
--- a/src/bin/hante_hog.rs
+++ b/src/bin/hante_hog.rs
@@ -106,20 +106,16 @@ async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {
         .value_of("CHANNELID") // TODO validate the format somehow
         .unwrap();
     // Reading the Slack URL from the command line
-    let base_url_input = arg_matches
-        .value_of("SLACKURL")
-        .unwrap();
+    let base_url_input = arg_matches.value_of("SLACKURL").unwrap();
     // Parse an absolute URL from a string.
     let base_url_as_url = Url::parse(base_url_input).unwrap();
     let base_url = base_url_as_url.as_str();
 
     // Reading the latest timestamp from the command line
-    let latest_input = arg_matches
-        .value_of("LATEST");
+    let latest_input = arg_matches.value_of("LATEST");
 
     // Reading the latest timestamp from the command line
-    let oldest_input = arg_matches
-        .value_of("OLDEST");
+    let oldest_input = arg_matches.value_of("OLDEST");
 
     // Still inside `async fn main`...
     let https = hyper_rustls::HttpsConnector::with_native_roots();
@@ -130,10 +126,20 @@ async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {
 
     // Building URL to request conversation history for the channel
     // TODO: Construct the URL using a URL library to avoid weird input issues?
-    let full_url = format!("{}/api/conversations.history?channel={}", base_url, channel_id);
+    let full_url = format!(
+        "{}/api/conversations.history?channel={}",
+        base_url, channel_id
+    );
 
     // Retrieving the history of the channel
-    let json_results_array = get_channel_history_json(hyper_client, auth_string, &full_url, latest_input, oldest_input).await;
+    let json_results_array = get_channel_history_json(
+        hyper_client,
+        auth_string,
+        &full_url,
+        latest_input,
+        oldest_input,
+    )
+    .await;
     // WARNING: This method requires storing ALL the slack channel history JSON in memory at once
     // TODO: Re-write these methods to scan each JSON API request - to conserve memory usage
 
@@ -142,11 +148,7 @@ async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {
 
     for json_results in json_results_array.iter() {
         // Parsing the messages as an array
-        let messages = json_results
-            .get("messages")
-            .unwrap()
-            .as_array()
-            .unwrap();
+        let messages = json_results.get("messages").unwrap().as_array().unwrap();
 
         // find secrets in each message
         for message in messages {
@@ -155,12 +157,21 @@ async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {
             let location = format!(
                 "message type {} by {} on {}",
                 message.get("type").unwrap(),
-                message.get("user").unwrap_or(&Value::String("".to_string())),
+                message
+                    .get("user")
+                    .unwrap_or(&Value::String("".to_string())),
                 message.get("ts").unwrap()
             );
             let message_text = message.get("text").unwrap().as_str().unwrap().as_bytes();
 
-            let message_findings = get_findings(&secret_scanner, base_url, channel_id, ts, message_text, location);
+            let message_findings = get_findings(
+                &secret_scanner,
+                base_url,
+                channel_id,
+                ts,
+                message_text,
+                location,
+            );
             secrets.extend(message_findings);
         }
     }
@@ -177,7 +188,6 @@ async fn run<'b>(arg_matches: ArgMatches<'b>) -> Result<(), SimpleError> {
     }
 }
 
-
 // TODO: move this to a separate file
 /// get_channel_history_json uses a hyper::client object to perform a POST on the full_url and return parsed serde JSON data
 async fn get_channel_history_json<'a, C>(
@@ -222,7 +232,7 @@ where
 
         debug!("sending request to {}", full_url_mod.clone());
 
-        let status = resp.status().clone();
+        let status = resp.status();
         debug!("Response: {:?}", status);
 
         let data = body::to_bytes(resp.into_body()).await.unwrap();
@@ -231,7 +241,9 @@ where
         if status != StatusCode::OK {
             panic!(
                 "Request to {} failed with code {:?}: {}",
-                full_url_mod.clone(), status, response_body
+                full_url_mod.clone(),
+                status,
+                response_body
             )
         }
 
@@ -241,16 +253,25 @@ where
         if !ok {
             panic!(
                 "Request to {} failed with error {:?}: {}",
-                full_url_mod.clone(), json_results["error"], response_body
+                full_url_mod.clone(),
+                json_results["error"],
+                response_body
             )
         }
         has_more = json_results.get("has_more").unwrap().as_bool().unwrap();
-        if has_more { // TODO: Cleanup weird borrowing issues?
-            let rm = json_results.get("response_metadata").unwrap().as_object().unwrap().clone();
-            cursor = Some(String::from(rm.get("next_cursor").unwrap().as_str().unwrap()));
+        if has_more {
+            // TODO: Cleanup weird borrowing issues?
+            let rm = json_results
+                .get("response_metadata")
+                .unwrap()
+                .as_object()
+                .unwrap()
+                .clone();
+            cursor = Some(String::from(
+                rm.get("next_cursor").unwrap().as_str().unwrap(),
+            ));
         }
         output.push(json_results);
-
     }
     output
 }
@@ -265,12 +286,11 @@ fn get_findings(
     description: &[u8],
     location: String,
 ) -> Vec {
-
     let lines = description.split(|&x| (x as char) == '\n');
     let mut secrets: Vec = Vec::new();
 
     // Building web links for Slack messages
-    // https://.slack.com/archives/ 
+    // https://.slack.com/archives/
     let msg_id = str::replace(ts, ".", "");
     let web_link = format!("{}/archives/{}/p{}", base_url, channel_id, msg_id);
 
@@ -278,7 +298,8 @@ fn get_findings(
     for new_line in lines {
         debug!("{:?}", std::str::from_utf8(new_line));
         // Builds a BTreeMap of the findings
-        let matches_map: BTreeMap> = secret_scanner.matches_entropy(new_line);
+        let matches_map: BTreeMap> =
+            secret_scanner.matches_entropy(new_line);
 
         // Iterate over the findings and add them to the list of findings to return
         for (reason, match_iterator) in matches_map {
diff --git a/src/git_scanning.rs b/src/git_scanning.rs
index 5e428ae..5cba7d6 100644
--- a/src/git_scanning.rs
+++ b/src/git_scanning.rs
@@ -49,12 +49,12 @@ use encoding::{DecoderTrap, Encoding};
 use git2::{Commit, DiffFormat, Tree};
 use git2::{DiffOptions, Repository, Time};
 use log::{self, debug, info};
+use rusty_hog_scanner::{RustyHogMatch, SecretScanner};
 use serde::{Deserialize, Serialize};
 use std::collections::{BTreeMap, HashSet};
 use std::hash::{Hash, Hasher};
 use std::path::Path;
 use std::{fmt, str};
-use rusty_hog_scanner::{RustyHogMatch, SecretScanner};
 use url::{ParseError, Url};
 
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Default)]
@@ -181,10 +181,7 @@ impl GitScanner {
                 Some(pc) => pc.id().to_string(),
                 None => String::from("None"),
             };
-            let a: Option = match parent_commit_option {
-                Some(pc) => Some(pc.tree().unwrap()),
-                _ => None,
-            };
+            let a: Option = parent_commit_option.map(|pc| pc.tree().unwrap());
             let b = commit.tree().unwrap();
             let mut diffopts = DiffOptions::new();
             diffopts.force_text(true);
@@ -241,7 +238,7 @@ impl GitScanner {
                                 commit_hash: commit.id().to_string(),
                                 commit: commit.message().unwrap().to_string(),
                                 diff: ASCII
-                                    .decode(&new_line, DecoderTrap::Ignore)
+                                    .decode(new_line, DecoderTrap::Ignore)
                                     .unwrap_or_else(|_| "".parse().unwrap()),
                                 date: NaiveDateTime::from_timestamp(commit.time().seconds(), 0)
                                     .to_string(),
diff --git a/src/google_scanning.rs b/src/google_scanning.rs
index e33d248..fae62e3 100644
--- a/src/google_scanning.rs
+++ b/src/google_scanning.rs
@@ -80,10 +80,10 @@ use encoding::all::ASCII;
 use encoding::{DecoderTrap, Encoding};
 use google_drive3::api::Scope;
 use hyper::body;
+use rusty_hog_scanner::SecretScanner;
 use serde_derive::{Deserialize, Serialize};
 use simple_error::SimpleError;
 use std::collections::HashSet;
-use rusty_hog_scanner::SecretScanner;
 
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Default)]
 /// `serde_json` object that represents a single found secret - finding
@@ -241,7 +241,7 @@ impl GDriveScanner {
         // main loop - search each line for secrets, output a list of GDriveFinding objects
         let mut findings: HashSet = HashSet::new();
         for new_line in lines {
-            let matches_map = self.secret_scanner.matches_entropy(&new_line);
+            let matches_map = self.secret_scanner.matches_entropy(new_line);
             for (reason, match_iterator) in matches_map {
                 let mut secrets: Vec = Vec::new();
                 for matchobj in match_iterator {
@@ -257,7 +257,7 @@ impl GDriveScanner {
                 if !secrets.is_empty() {
                     findings.insert(GDriveFinding {
                         diff: ASCII
-                            .decode(&new_line, DecoderTrap::Ignore)
+                            .decode(new_line, DecoderTrap::Ignore)
                             .unwrap_or_else(|_| "".parse().unwrap()),
                         date: gdrivefile.modified_time.clone(),
                         strings_found: secrets.clone(),

From 9b42730a99b165b3fa8d347d6fda07808e7230ed Mon Sep 17 00:00:00 2001
From: noahmmcgivern 
Date: Thu, 21 Dec 2023 10:22:24 -0500
Subject: [PATCH 2/2] Add no_run attribute to git_scanning documentation

---
 src/git_scanning.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/git_scanning.rs b/src/git_scanning.rs
index 5cba7d6..97e93b7 100644
--- a/src/git_scanning.rs
+++ b/src/git_scanning.rs
@@ -29,7 +29,7 @@
 //! `HashSet` of findings. In this example we're specifying a specific commit to stop scanning at
 //! (801360e) so we can have a reliable result.
 //!
-//! ```
+//! ```no_run
 //! use rusty_hog_scanner::SecretScannerBuilder;
 //! use rusty_hogs::git_scanning::{GitScanner, GitFinding};
 //! use std::collections::HashSet;