diff --git a/.fileignore b/.fileignore new file mode 100644 index 000000000000..c7e9e3cb0872 --- /dev/null +++ b/.fileignore @@ -0,0 +1,5 @@ +# Use UNIX style pattern matching to select files for truffleHog to ignore. Use slashes for directory matching. +# Examples: +# *.md +# LICENSE +# temp/* \ No newline at end of file diff --git a/truffleHog/truffleHog.py b/truffleHog/truffleHog.py index 7c01379eabca..4f790e15c364 100644 --- a/truffleHog/truffleHog.py +++ b/truffleHog/truffleHog.py @@ -10,18 +10,28 @@ import os import json import stat +import fnmatch from git import Repo def main(): parser = argparse.ArgumentParser(description='Find secrets hidden in the depths of git.') parser.add_argument('--json', dest="output_json", action="store_true", help="Output in JSON") parser.add_argument('git_url', type=str, help='URL for secret searching') + + # if the .fileignore file exists, attempt to import file patterns + try: + with open('.fileignore', 'r') as f: + for line in f: + if not (line[0] == "#"): + file_filter_patterns.append(line.rstrip()) + except: + pass + args = parser.parse_args() output = find_strings(args.git_url, args.output_json) project_path = output["project_path"] shutil.rmtree(project_path, onerror=del_rw) - BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" HEX_CHARS = "1234567890abcdefABCDEF" @@ -29,6 +39,16 @@ def del_rw(action, name, exc): os.chmod(name, stat.S_IWRITE) os.remove(name) +file_filter_patterns = [] + +def pathfilter(path): + for pat in file_filter_patterns: + if ("/" in pat) or ("\\" in pat): + if fnmatch.fnmatch(path, pat): return None + else: + if fnmatch.fnmatch(os.path.basename(path), pat): return None + return path + def shannon_entropy(data, iterator): """ Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html @@ -99,6 +119,13 @@ def find_strings(git_url, printJson=False): diff = prev_commit.diff(curr_commit, create_patch=True) for blob in diff: #print i.a_blob.data_stream.read() + if blob.a_path: + path = blob.a_path + else: + path = blob.b_path + if path: + if not pathfilter(path): + continue printableDiff = blob.diff.decode('utf-8', errors='replace') if printableDiff.startswith("Binary files"): continue @@ -121,15 +148,17 @@ def find_strings(git_url, printJson=False): if len(stringsFound) > 0: commit_time = datetime.datetime.fromtimestamp(prev_commit.committed_date).strftime('%Y-%m-%d %H:%M:%S') entropicDiff = {} + entropicDiff['file'] = str(path) entropicDiff['date'] = commit_time entropicDiff['branch'] = branch_name entropicDiff['commit'] = prev_commit.message - entropicDiff['diff'] = blob.diff.decode('utf-8', errors='replace') + entropicDiff['diff'] = blob.diff.decode('utf-8', errors='replace') entropicDiff['stringsFound'] = stringsFound output["entropicDiffs"].append(entropicDiff) if printJson: print(json.dumps(output, sort_keys=True, indent=4)) else: + print(bcolors.OKGREEN + "File: " + str(path) + bcolors.ENDC) print(bcolors.OKGREEN + "Date: " + commit_time + bcolors.ENDC) print(bcolors.OKGREEN + "Branch: " + branch_name + bcolors.ENDC) print(bcolors.OKGREEN + "Commit: " + prev_commit.message + bcolors.ENDC)