diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..dba132dc --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 88 +extend-ignore = E203, E501, W503 diff --git a/.github/workflows/style_automation.yml b/.github/workflows/style_automation.yml new file mode 100644 index 00000000..eb70b3b4 --- /dev/null +++ b/.github/workflows/style_automation.yml @@ -0,0 +1,27 @@ +--- +name: Style Automation +on: + push: + pull_request: + branches: + - master +jobs: + style: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black mypy flake8 isort + + - name: Code quality checks + run: | + python -m black --extend-exclude .*_pb2.*\.py --check --diff src/python + python -m flake8 --exclude src/python/build/,src/python/strelka/proto/ src/python + python -m isort --extend-skip-glob *_pb2*.py --profile black --check-only src/python diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..aa635bf5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +--- +repos: + - repo: https://github.com/psf/black + rev: "22.6.0" + hooks: + - id: black + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-added-large-files + - id: check-merge-conflict + - id: mixed-line-ending + - id: no-commit-to-branch + args: + - -b main + - repo: https://github.com/PyCQA/flake8 + rev: "4.0.1" + hooks: + - id: flake8 + - repo: https://github.com/PyCQA/isort + rev: "5.10.1" + hooks: + - id: isort + args: ["--profile", "black", "--filter-files"] +# - repo: https://github.com/pre-commit/mirrors-mypy +# rev: v0.961 +# hooks: +# - id: mypy +# additional_dependencies: +# - types-requests diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f316bc37..46bb0d44 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,6 +14,46 @@ Bugs should be submitted as issues using the bug report template. Enhancements should be submitted as issues using the feature request template. ## Development Environment + +Clone the repo + +``` +git clone https://github.com/target/strelka.git +``` + +Python should be set up to use a virtualenv. + +``` +cd strelka/ +python -m venv env +``` + +Activate the virtualenv + +``` +source env/bin/activate +``` + +Install build requirements + +``` +cd src/python/ +pip install -r requirements.txt +``` + +Install strelka + +``` +python setup.py install +``` + +Install pre-commit hooks + +``` +pre-commit install +``` + + Development builds can tested using the default docker-compose.yaml file (`build/docker-compose.yaml`). To bring the project up with docker-compse, use the following command as a template: ```bash docker-compose -f build/docker-compose.yaml --project-name strelka up @@ -76,8 +116,11 @@ tests_configuration/test_taste.py .............................................. ``` ## Style Guides + ### Python -Python code should attempt to adhere as closely to [PEP8](https://www.python.org/dev/peps/pep-0008/) as possible. We may ask authors to refactor code for better PEP8 compliance, but we do not enforce 100% compliance. +Python code should attempt to adhere as closely to [PEP8](https://www.python.org/dev/peps/pep-0008/) as possible. + +Conformance is ensured using `black`, `isort`, `flake8`, and `mypy` in pre-commit hooks and CI/CD actions. Pushes and PRs may be automatically rejected due to non-conformance. Review errors/warnings from the style modules for tips. ### Scanners * Write event data in snake_case JSON format diff --git a/build/python/backend/requirements.txt b/build/python/backend/requirements.txt index d2380517..0ff359ce 100644 --- a/build/python/backend/requirements.txt +++ b/build/python/backend/requirements.txt @@ -28,6 +28,7 @@ opencv-python==4.6.0.66 openpyxl==3.0.9 pefile==2022.5.30 pgpdump3==1.5.2 +pre-commit==3.0.1 py-tlsh==4.7.2 pycdlib==1.13.0 pyelftools==0.28 diff --git a/src/python/bin/strelka-backend b/src/python/bin/strelka-backend index a0aa86eb..b8864da3 100644 --- a/src/python/bin/strelka-backend +++ b/src/python/bin/strelka-backend @@ -8,10 +8,10 @@ import argparse import logging.config import os import sys + import redis import yaml - from strelka import strelka diff --git a/src/python/bin/strelka-mmrpc b/src/python/bin/strelka-mmrpc index 0fae4706..25fe1be4 100644 --- a/src/python/bin/strelka-mmrpc +++ b/src/python/bin/strelka-mmrpc @@ -1,14 +1,12 @@ #! /usr/bin/env python3 import argparse -from concurrent import futures import json import time +from concurrent import futures import grpc from mmbot import mmbot - -from strelka.proto import mmbot_pb2 -from strelka.proto import mmbot_pb2_grpc +from strelka.proto import mmbot_pb2, mmbot_pb2_grpc class MmbotServicer(mmbot_pb2_grpc.MmbotServicer): diff --git a/src/python/requirements.txt b/src/python/requirements.txt index d2380517..0ff359ce 100644 --- a/src/python/requirements.txt +++ b/src/python/requirements.txt @@ -28,6 +28,7 @@ opencv-python==4.6.0.66 openpyxl==3.0.9 pefile==2022.5.30 pgpdump3==1.5.2 +pre-commit==3.0.1 py-tlsh==4.7.2 pycdlib==1.13.0 pyelftools==0.28 diff --git a/src/python/setup.py b/src/python/setup.py index b9096034..52368df2 100644 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -2,16 +2,16 @@ import setuptools setuptools.setup( - name='strelka', - author='Target Brands, Inc.', - description='strelka: container-based file analysis at scale', - license='Apache 2.0', + name="strelka", + author="Target Brands, Inc.", + description="strelka: container-based file analysis at scale", + license="Apache 2.0", packages=setuptools.find_packages(), - scripts=['bin/strelka-backend', 'bin/strelka-mmrpc'], + scripts=["bin/strelka-backend", "bin/strelka-mmrpc"], zip_safe=False, entry_points={ - 'console_scripts': [ - 'strelka = strelka.__main__:main', + "console_scripts": [ + "strelka = strelka.__main__:main", ] - } + }, ) diff --git a/src/python/strelka/__main__.py b/src/python/strelka/__main__.py index 82aaea67..1eb5a3c0 100644 --- a/src/python/strelka/__main__.py +++ b/src/python/strelka/__main__.py @@ -3,10 +3,10 @@ import os import sys import time +from importlib.resources import files import yaml -from importlib.resources import files from strelka import strelka @@ -24,24 +24,24 @@ def main(): if os.path.exists("/etc/strelka/backend.yaml"): backend_cfg_path = "/etc/strelka/backend.yaml" - elif os.path.exists(files('strelka.config').joinpath('backend.yaml')): - backend_cfg_path = files('strelka.config').joinpath('backend.yaml') + elif os.path.exists(files("strelka.config").joinpath("backend.yaml")): + backend_cfg_path = files("strelka.config").joinpath("backend.yaml") else: logging.exception("no backend configuration found") sys.exit(1) if os.path.exists("/etc/strelka/taste/taste.yara"): taste_path = "/etc/strelka/taste/taste.yara" - elif os.path.exists(files('strelka.config').joinpath('taste.yara')): - taste_path = str(files('strelka.config').joinpath('taste.yara')) + elif os.path.exists(files("strelka.config").joinpath("taste.yara")): + taste_path = str(files("strelka.config").joinpath("taste.yara")) else: logging.exception("no taste path found") sys.exit(1) if os.path.exists("/etc/strelka/yara/rules.yara"): yara_rules_path = "/etc/strelka/yara/rules.yara" - elif os.path.exists(files('strelka.config').joinpath('rules.yara')): - yara_rules_path = str(files('strelka.config').joinpath('rules.yara')) + elif os.path.exists(files("strelka.config").joinpath("rules.yara")): + yara_rules_path = str(files("strelka.config").joinpath("rules.yara")) else: logging.exception("no yara rules path found") sys.exit(1) diff --git a/src/python/strelka/auxiliary/xl4ma/analyzer.py b/src/python/strelka/auxiliary/xl4ma/analyzer.py index 88678e01..6870c303 100644 --- a/src/python/strelka/auxiliary/xl4ma/analyzer.py +++ b/src/python/strelka/auxiliary/xl4ma/analyzer.py @@ -5,11 +5,12 @@ import os import tempfile from pathlib import Path + from strelka.auxiliary.xl4ma.extract import iocs +from strelka.auxiliary.xl4ma.xl4decoder import decode from strelka.auxiliary.xl4ma.xls_wrapper import XLSWrapper from strelka.auxiliary.xl4ma.xlsb_wrapper import XLSBWrapper from strelka.auxiliary.xl4ma.xlsm_wrapper import XLSMWrapper -from strelka.auxiliary.xl4ma.xl4decoder import decode def _make_temp_file(data, file_type): diff --git a/src/python/strelka/auxiliary/xl4ma/extract.py b/src/python/strelka/auxiliary/xl4ma/extract.py index 978d0039..4948470a 100644 --- a/src/python/strelka/auxiliary/xl4ma/extract.py +++ b/src/python/strelka/auxiliary/xl4ma/extract.py @@ -9,11 +9,15 @@ def iocs(excel_doc_decoded): extracted = set() for decoded in excel_doc_decoded: - if url := re.findall('(https?://[A-Za-z0-9-._]+/[A-Za-z0-9-._~:/?#\[\]@!$&\'\(\)*+,;%=]+[^,\s\)])', decoded, flags=re.IGNORECASE): + if url := re.findall( + r"(https?://[A-Za-z0-9-._]+/[A-Za-z0-9-._~:/?#\[\]@!$&'\(\)*+,;%=]+[^,\s\)])", + decoded, + flags=re.IGNORECASE, + ): scheme, netloc, path, params, query, fragment = urlparse(url[0]) - if netloc.startswith('0x'): + if netloc.startswith("0x"): netloc = socket.inet_ntoa(struct.pack(">L", int(netloc, 16))) - if netloc.startswith('0o'): + if netloc.startswith("0o"): netloc = socket.inet_ntoa(struct.pack(">L", int(netloc, 8))) extracted.add(f"{scheme}://{netloc}{path}") diff --git a/src/python/strelka/auxiliary/xl4ma/xl4decoder.py b/src/python/strelka/auxiliary/xl4ma/xl4decoder.py index 40cc35c6..041e0466 100644 --- a/src/python/strelka/auxiliary/xl4ma/xl4decoder.py +++ b/src/python/strelka/auxiliary/xl4ma/xl4decoder.py @@ -1,12 +1,13 @@ # Authors: Ryan Borre import tempfile +from os import devnull + import xlrd2 from openpyxl.workbook import Workbook from pyxlsb2 import open_workbook from pyxlsb2.formula import Formula from pyxlsb2.records import ErrorValue -from os import devnull from strelka.auxiliary.xl4ma.xl4interpreter import Interpreter @@ -47,7 +48,7 @@ def _decode_xls(file_path, defined_names): book_sheet.cell( row + 1, col + 1, wb[sheet_name].cell(row, col).value ) - except: + except Exception: pass temp_file = tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) book.save(temp_file.name) @@ -96,7 +97,7 @@ def _decode_xlsb(file_path, defined_names): cell.col + 1, str(cell.value).rstrip("\x00"), ) - except: + except Exception: pass temp_file = tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) book.save(temp_file.name) @@ -107,7 +108,7 @@ def _decode_xlsb(file_path, defined_names): # XLSM def _decode_xlsm(file_path, defined_names): - with tempfile.NamedTemporaryFile(suffix=f".xlsm", delete=False) as temp_file, open( + with tempfile.NamedTemporaryFile(suffix=".xlsm", delete=False) as temp_file, open( file_path, "rb" ) as fp: temp_file.write(fp.read()) diff --git a/src/python/strelka/auxiliary/xl4ma/xl4interpreter.py b/src/python/strelka/auxiliary/xl4ma/xl4interpreter.py index 74af028f..0b870a58 100644 --- a/src/python/strelka/auxiliary/xl4ma/xl4interpreter.py +++ b/src/python/strelka/auxiliary/xl4ma/xl4interpreter.py @@ -1,9 +1,10 @@ # Authors: Ryan Borre import logging -import formulas import os +import formulas + class Interpreter: def __init__(self, defined_names): @@ -159,7 +160,7 @@ def calculate(self, temp_file): if isinstance(result, str): self.results.add(str(result)) - except: + except Exception: logging.info("formula error") temp_file.close() diff --git a/src/python/strelka/auxiliary/xl4ma/xls_wrapper.py b/src/python/strelka/auxiliary/xl4ma/xls_wrapper.py index 68755c27..3e806694 100644 --- a/src/python/strelka/auxiliary/xl4ma/xls_wrapper.py +++ b/src/python/strelka/auxiliary/xl4ma/xls_wrapper.py @@ -1,10 +1,11 @@ # Authors: Ryan Borre -import xlrd -import xlrd2 from enum import Enum from os import devnull +import xlrd +import xlrd2 + class VISIBILITY(Enum): VISIBLE = 0 @@ -73,7 +74,7 @@ def parse_sheets(self, file_path): .value, } ) - except: + except Exception: pass results["sheets"].append( diff --git a/src/python/strelka/auxiliary/xl4ma/xlsb_wrapper.py b/src/python/strelka/auxiliary/xl4ma/xlsb_wrapper.py index ed8245d0..b9ddfa9e 100644 --- a/src/python/strelka/auxiliary/xl4ma/xlsb_wrapper.py +++ b/src/python/strelka/auxiliary/xl4ma/xlsb_wrapper.py @@ -9,7 +9,7 @@ class XLSBWrapper: def __init__(self, file_path): try: self.workbook = open_workbook(file_path) - except Exception as e: + except Exception: return def get_defined_names(self): @@ -54,7 +54,7 @@ def parse_sheets(self, file_path): "value": cell.value, } ) - except: + except Exception: pass results["sheets"].append( { diff --git a/src/python/strelka/auxiliary/xl4ma/xlsm_wrapper.py b/src/python/strelka/auxiliary/xl4ma/xlsm_wrapper.py index dd34c28d..2a76d36b 100644 --- a/src/python/strelka/auxiliary/xl4ma/xlsm_wrapper.py +++ b/src/python/strelka/auxiliary/xl4ma/xlsm_wrapper.py @@ -1,6 +1,7 @@ # Authors: Ryan Borre import re + from openpyxl import load_workbook @@ -8,7 +9,7 @@ class XLSMWrapper: def __init__(self, file_path): try: self.workbook = load_workbook(file_path, read_only=False, keep_vba=True) - except Exception as e: + except Exception: return def get_defined_names(self): @@ -36,7 +37,7 @@ def parse_sheets(self, file_path): formulas.append( {"cell": cell.coordinate, "value": cell.value} ) - if re.match("^=?\w+?\(\)$", cell.value): + if re.match(r"^=?\w+?\(\)$", cell.value): worksheet = self.workbook[sheet] worksheet[cell.coordinate] = "" elif cell.data_type == "n": diff --git a/src/python/strelka/cstructs/bits/ese.py b/src/python/strelka/cstructs/bits/ese.py index 9be58fe6..e690fda3 100644 --- a/src/python/strelka/cstructs/bits/ese.py +++ b/src/python/strelka/cstructs/bits/ese.py @@ -8,245 +8,248 @@ # Author: Alberto Solino (@agsolino) from collections import OrderedDict +from struct import pack, unpack + from .structure import Structure -from struct import unpack, pack # Constants -FILE_TYPE_DATABASE = 0 +FILE_TYPE_DATABASE = 0 FILE_TYPE_STREAMING_FILE = 1 # Database state -JET_dbstateJustCreated = 1 -JET_dbstateDirtyShutdown = 2 -JET_dbstateCleanShutdown = 3 +JET_dbstateJustCreated = 1 +JET_dbstateDirtyShutdown = 2 +JET_dbstateCleanShutdown = 3 JET_dbstateBeingConverted = 4 -JET_dbstateForceDetach = 5 +JET_dbstateForceDetach = 5 # Page Flags -FLAGS_ROOT = 1 -FLAGS_LEAF = 2 -FLAGS_PARENT = 4 -FLAGS_EMPTY = 8 -FLAGS_SPACE_TREE = 0x20 -FLAGS_INDEX = 0x40 -FLAGS_LONG_VALUE = 0x80 -FLAGS_NEW_FORMAT = 0x2000 +FLAGS_ROOT = 1 +FLAGS_LEAF = 2 +FLAGS_PARENT = 4 +FLAGS_EMPTY = 8 +FLAGS_SPACE_TREE = 0x20 +FLAGS_INDEX = 0x40 +FLAGS_LONG_VALUE = 0x80 +FLAGS_NEW_FORMAT = 0x2000 FLAGS_NEW_CHECKSUM = 0x2000 # Tag Flags TAG_UNKNOWN = 0x1 TAG_DEFUNCT = 0x2 -TAG_COMMON = 0x4 +TAG_COMMON = 0x4 # Fixed Page Numbers -DATABASE_PAGE_NUMBER = 1 -CATALOG_PAGE_NUMBER = 4 -CATALOG_BACKUP_PAGE_NUMBER = 24 +DATABASE_PAGE_NUMBER = 1 +CATALOG_PAGE_NUMBER = 4 +CATALOG_BACKUP_PAGE_NUMBER = 24 # Fixed FatherDataPages -DATABASE_FDP = 1 -CATALOG_FDP = 2 -CATALOG_BACKUP_FDP = 3 +DATABASE_FDP = 1 +CATALOG_FDP = 2 +CATALOG_BACKUP_FDP = 3 # Catalog Types -CATALOG_TYPE_TABLE = 1 -CATALOG_TYPE_COLUMN = 2 -CATALOG_TYPE_INDEX = 3 -CATALOG_TYPE_LONG_VALUE = 4 -CATALOG_TYPE_CALLBACK = 5 +CATALOG_TYPE_TABLE = 1 +CATALOG_TYPE_COLUMN = 2 +CATALOG_TYPE_INDEX = 3 +CATALOG_TYPE_LONG_VALUE = 4 +CATALOG_TYPE_CALLBACK = 5 # Column Types -JET_coltypNil = 0 -JET_coltypBit = 1 +JET_coltypNil = 0 +JET_coltypBit = 1 JET_coltypUnsignedByte = 2 -JET_coltypShort = 3 -JET_coltypLong = 4 -JET_coltypCurrency = 5 -JET_coltypIEEESingle = 6 -JET_coltypIEEEDouble = 7 -JET_coltypDateTime = 8 -JET_coltypBinary = 9 -JET_coltypText = 10 -JET_coltypLongBinary = 11 -JET_coltypLongText = 12 -JET_coltypSLV = 13 +JET_coltypShort = 3 +JET_coltypLong = 4 +JET_coltypCurrency = 5 +JET_coltypIEEESingle = 6 +JET_coltypIEEEDouble = 7 +JET_coltypDateTime = 8 +JET_coltypBinary = 9 +JET_coltypText = 10 +JET_coltypLongBinary = 11 +JET_coltypLongText = 12 +JET_coltypSLV = 13 JET_coltypUnsignedLong = 14 -JET_coltypLongLong = 15 -JET_coltypGUID = 16 -JET_coltypUnsignedShort= 17 -JET_coltypMax = 18 +JET_coltypLongLong = 15 +JET_coltypGUID = 16 +JET_coltypUnsignedShort = 17 +JET_coltypMax = 18 ColumnTypeToName = { - JET_coltypNil : 'NULL', - JET_coltypBit : 'Boolean', - JET_coltypUnsignedByte : 'Signed byte', - JET_coltypShort : 'Signed short', - JET_coltypLong : 'Signed long', - JET_coltypCurrency : 'Currency', - JET_coltypIEEESingle : 'Single precision FP', - JET_coltypIEEEDouble : 'Double precision FP', - JET_coltypDateTime : 'DateTime', - JET_coltypBinary : 'Binary', - JET_coltypText : 'Text', - JET_coltypLongBinary : 'Long Binary', - JET_coltypLongText : 'Long Text', - JET_coltypSLV : 'Obsolete', - JET_coltypUnsignedLong : 'Unsigned long', - JET_coltypLongLong : 'Long long', - JET_coltypGUID : 'GUID', - JET_coltypUnsignedShort: 'Unsigned short', - JET_coltypMax : 'Max', + JET_coltypNil: "NULL", + JET_coltypBit: "Boolean", + JET_coltypUnsignedByte: "Signed byte", + JET_coltypShort: "Signed short", + JET_coltypLong: "Signed long", + JET_coltypCurrency: "Currency", + JET_coltypIEEESingle: "Single precision FP", + JET_coltypIEEEDouble: "Double precision FP", + JET_coltypDateTime: "DateTime", + JET_coltypBinary: "Binary", + JET_coltypText: "Text", + JET_coltypLongBinary: "Long Binary", + JET_coltypLongText: "Long Text", + JET_coltypSLV: "Obsolete", + JET_coltypUnsignedLong: "Unsigned long", + JET_coltypLongLong: "Long long", + JET_coltypGUID: "GUID", + JET_coltypUnsignedShort: "Unsigned short", + JET_coltypMax: "Max", } ColumnTypeSize = { - JET_coltypNil : None, - JET_coltypBit : (1,'B'), - JET_coltypUnsignedByte : (1,'B'), - JET_coltypShort : (2,' 8192: self.structure += self.extended_win7 - Structure.__init__(self,data) + Structure.__init__(self, data) + class ESENT_ROOT_HEADER(Structure): structure = ( - ('InitialNumberOfPages',' 0: self.structure = self.common + self.structure - Structure.__init__(self,data) + Structure.__init__(self, data) + class ESENT_LEAF_HEADER(Structure): - structure = ( - ('CommonPageKey',':'), - ) + structure = (("CommonPageKey", ":"),) + class ESENT_LEAF_ENTRY(Structure): - common = ( - ('CommonPageKeySize',' 0: self.structure = self.common + self.structure - Structure.__init__(self,data) + Structure.__init__(self, data) + class ESENT_SPACE_TREE_HEADER(Structure): - structure = ( - ('Unknown','= self.record['FirstAvailablePageTag']: - raise Exception(f'Requested tag number 0x{tagNum:X} exceeds page limit') + if tagNum >= self.record["FirstAvailablePageTag"]: + raise Exception(f"Requested tag number 0x{tagNum:X} exceeds page limit") # The tags are in an array at the end of the page (4 bytes each) if tagNum == 0: tag = self.data[-4:] else: - tag = self.data[-4*(tagNum+1):-4*tagNum] + tag = self.data[-4 * (tagNum + 1) : -4 * tagNum] # Offsets are relative to the ESENT_PAGE struct baseOffset = len(self.record) # New database format uses 15-bit numbers for size and offset - if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] >= 17 and self.__DBHeader['PageSize'] > 8192: - valueSize = unpack('= 17 + and self.__DBHeader["PageSize"] > 8192 + ): + valueSize = unpack("> 5 - tmpData[1] = tmpData[1] & 0x1f + tmpData[1] = tmpData[1] & 0x1F tagData = bytes(tmpData) else: - valueSize = unpack('> 13 - valueOffset = unpack('> 13 + valueOffset = unpack(" 127: - numEntries = dataDefinitionHeader['LastVariableDataType'] - 127 + if dataDefinitionHeader["LastVariableDataType"] > 127: + numEntries = dataDefinitionHeader["LastVariableDataType"] - 127 else: - numEntries = dataDefinitionHeader['LastVariableDataType'] - - itemLen = unpack(' len(data): continue # If there is common key data defined, prepend it to the current key data if common_key_data: - key_data = common_key_data + data[cur_offset:cur_offset+key_size] + key_data = ( + common_key_data + data[cur_offset : cur_offset + key_size] + ) else: - key_data = data[cur_offset:cur_offset+key_size] + key_data = data[cur_offset : cur_offset + key_size] cur_offset += key_size # The value contains all data in the tag after the key value_size = len(data) - cur_offset - value_data = data[cur_offset:cur_offset+value_size] + value_data = data[cur_offset : cur_offset + value_size] longValues[key_data] = value_data return longValues @@ -535,7 +553,7 @@ def getLongValue(self, cursor, data): try: # For some reason the ID is stored big endian data_be = data[::-1] - long_data = cursor['LongValues'].get(data_be) + long_data = cursor["LongValues"].get(data_be) # The initial long entry contains an unknown value and total long data size _, long_data_size = unpack("I", cur_offset) - cur_data = cursor['LongValues'].get(cur_key) + cur_data = cursor["LongValues"].get(cur_key) # If there was no more data but we still had some data, return it if not cur_data and len(combined_data) > 0: return bytes(combined_data) @@ -564,11 +582,15 @@ def parsePage(self, page): """Parses a catalog page and adds relevant information to the table structures""" # Safety check to exclude page types that should not be in the catalog - if page.record['PageFlags'] & (FLAGS_LEAF | FLAGS_SPACE_TREE | FLAGS_INDEX | FLAGS_LONG_VALUE) == 0: + if ( + page.record["PageFlags"] + & (FLAGS_LEAF | FLAGS_SPACE_TREE | FLAGS_INDEX | FLAGS_LONG_VALUE) + == 0 + ): return # Enumerate tags in the page - for tagNum in range(1,page.record['FirstAvailablePageTag']): + for tagNum in range(1, page.record["FirstAvailablePageTag"]): flags, data = page.getTag(tagNum) leafEntry = ESENT_LEAF_ENTRY(flags, data) self.__addItem(leafEntry) @@ -579,16 +601,16 @@ def parseCatalog(self, pageNum): self.parsePage(page) # Recursively process referenced pages from branch page tags - if page.record['PageFlags'] & FLAGS_LEAF == 0: - for i in range(1, page.record['FirstAvailablePageTag']): + if page.record["PageFlags"] & FLAGS_LEAF == 0: + for i in range(1, page.record["FirstAvailablePageTag"]): flags, data = page.getTag(i) branchEntry = ESENT_BRANCH_ENTRY(flags, data) - self.parseCatalog(branchEntry['ChildPageNumber']) + self.parseCatalog(branchEntry["ChildPageNumber"]) def getPage(self, pageNum): """Reads the specified page and parses headers (except on the root page)""" - offset = (pageNum+1)*self.__pageSize - data = self.__fileData[offset:offset+self.__pageSize] + offset = (pageNum + 1) * self.__pageSize + data = self.__fileData[offset : offset + self.__pageSize] # Special case for the first page if pageNum <= 0: @@ -607,41 +629,45 @@ def openTable(self, tableName): if not cur_table: return None - entry = cur_table['TableEntry'] - dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData']) - catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):]) - + entry = cur_table["TableEntry"] + dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry["EntryData"]) + catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY( + entry["EntryData"][len(dataDefinitionHeader) :] + ) + # Find the first leaf node - pageNum = catalogEntry['FatherDataPageNumber'] + pageNum = catalogEntry["FatherDataPageNumber"] done = False while not done: page = self.getPage(pageNum) # If there are no records, return the first page - if page.record['FirstAvailablePageTag'] <= 1: + if page.record["FirstAvailablePageTag"] <= 1: done = True # Enumerate tags for the current page - for i in range(1, page.record['FirstAvailablePageTag']): + for i in range(1, page.record["FirstAvailablePageTag"]): flags, data = page.getTag(i) # If this is a branch node, check child page - if page.record['PageFlags'] & FLAGS_LEAF == 0: + if page.record["PageFlags"] & FLAGS_LEAF == 0: branchEntry = ESENT_BRANCH_ENTRY(flags, data) - pageNum = branchEntry['ChildPageNumber'] + pageNum = branchEntry["ChildPageNumber"] break # Otherwise, stop else: done = True break - + cursor = TABLE_CURSOR - cursor['TableData'] = self.__tables[tableName] - cursor['FatherDataPageNumber'] = catalogEntry['FatherDataPageNumber'] - cursor['CurrentPageData'] = page - cursor['CurrentTag'] = 0 + cursor["TableData"] = self.__tables[tableName] + cursor["FatherDataPageNumber"] = catalogEntry["FatherDataPageNumber"] + cursor["CurrentPageData"] = page + cursor["CurrentTag"] = 0 # Create a mapping of the long values tree - cursor['LongValues'] = self.__getLongValues(cursor['TableData']['LongValues']['FatherDataPageNumber']) + cursor["LongValues"] = self.__getLongValues( + cursor["TableData"]["LongValues"]["FatherDataPageNumber"] + ) return cursor @@ -650,34 +676,34 @@ def __getNextTag(self, cursor): Given a cursor, finds the next valid tag in the page. Returns None when the end of the tags are reached for the current page or if the current page is not a leaf since the tags are actually branches. """ - page = cursor['CurrentPageData'] + page = cursor["CurrentPageData"] # If this isn't a leaf page, move to the next page - if page.record['PageFlags'] & FLAGS_LEAF == 0: + if page.record["PageFlags"] & FLAGS_LEAF == 0: return None # Find the next non-defunct tag tag_flags = None tag_data = None - while cursor['CurrentTag'] < page.record['FirstAvailablePageTag']: - tag_flags, tag_data = page.getTag(cursor['CurrentTag']) + while cursor["CurrentTag"] < page.record["FirstAvailablePageTag"]: + tag_flags, tag_data = page.getTag(cursor["CurrentTag"]) if tag_flags & TAG_DEFUNCT: - cursor['CurrentTag'] += 1 + cursor["CurrentTag"] += 1 continue else: break # If we have reached the last tag of this page, return None to move to the next page - if cursor['CurrentTag'] >= page.record['FirstAvailablePageTag']: + if cursor["CurrentTag"] >= page.record["FirstAvailablePageTag"]: return None # Check for unexpected page flags - if page.record['PageFlags'] & FLAGS_SPACE_TREE > 0: - raise Exception('FLAGS_SPACE_TREE > 0') - elif page.record['PageFlags'] & FLAGS_INDEX > 0: - raise Exception('FLAGS_INDEX > 0') - elif page.record['PageFlags'] & FLAGS_LONG_VALUE > 0: - raise Exception('FLAGS_LONG_VALUE > 0') + if page.record["PageFlags"] & FLAGS_SPACE_TREE > 0: + raise Exception("FLAGS_SPACE_TREE > 0") + elif page.record["PageFlags"] & FLAGS_INDEX > 0: + raise Exception("FLAGS_INDEX > 0") + elif page.record["PageFlags"] & FLAGS_LONG_VALUE > 0: + raise Exception("FLAGS_LONG_VALUE > 0") # Return the tag entry leafEntry = ESENT_LEAF_ENTRY(tag_flags, tag_data) @@ -687,22 +713,22 @@ def getNextRow(self, cursor): """Retrieves the next row (aka tag) for the given cursor position in the table""" # Increment the tag number and get the next valid tag from the current page - cursor['CurrentTag'] += 1 + cursor["CurrentTag"] += 1 tag = self.__getNextTag(cursor) # If there are no more tags on this page, try the next page if tag is None: - page = cursor['CurrentPageData'] - if page.record['NextPageNumber'] == 0: + page = cursor["CurrentPageData"] + if page.record["NextPageNumber"] == 0: return None else: - cursor['CurrentPageData'] = self.getPage(page.record['NextPageNumber']) - cursor['CurrentTag'] = 0 + cursor["CurrentPageData"] = self.getPage(page.record["NextPageNumber"]) + cursor["CurrentTag"] = 0 return self.getNextRow(cursor) # Otherwise, parse the current tag data into a record (resolving columns, long values, etc.) else: - return self.__tagToRecord(cursor, tag['EntryData']) + return self.__tagToRecord(cursor, tag["EntryData"]) def __tagToRecord(self, cursor, tag): # So my brain doesn't forget, the data record is composed of: @@ -721,7 +747,7 @@ def __tagToRecord(self, cursor, tag): # values, size. # # The interesting thing about this DB records is there's no need for all the columns to be there, hence - # saving space. That's why I got over all the columns, and if I find data (of any type), i assign it. If + # saving space. That's why I got over all the columns, and if I find data (of any type), i assign it. If # not, the column's empty. record = OrderedDict() @@ -729,78 +755,105 @@ def __tagToRecord(self, cursor, tag): taggedItemsParsed = False dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(tag) - variableDataBytesProcessed = (dataDefinitionHeader['LastVariableDataType'] - 127) * 2 + variableDataBytesProcessed = ( + dataDefinitionHeader["LastVariableDataType"] - 127 + ) * 2 prevItemLen = 0 tagLen = len(tag) fixedSizeOffset = len(dataDefinitionHeader) - variableSizeOffset = dataDefinitionHeader['VariableSizeOffset'] - - columns = cursor['TableData']['Columns'] - + variableSizeOffset = dataDefinitionHeader["VariableSizeOffset"] + + columns = cursor["TableData"]["Columns"] + for column in list(columns.keys()): - columnRecord = columns[column]['Record'] - if columnRecord['Identifier'] <= dataDefinitionHeader['LastFixedSize']: + columnRecord = columns[column]["Record"] + if columnRecord["Identifier"] <= dataDefinitionHeader["LastFixedSize"]: # Fixed Size column data type, still available data - record[column] = tag[fixedSizeOffset:][:columnRecord['SpaceUsage']] - fixedSizeOffset += columnRecord['SpaceUsage'] - - elif 127 < columnRecord['Identifier'] <= dataDefinitionHeader['LastVariableDataType']: + record[column] = tag[fixedSizeOffset:][: columnRecord["SpaceUsage"]] + fixedSizeOffset += columnRecord["SpaceUsage"] + + elif ( + 127 + < columnRecord["Identifier"] + <= dataDefinitionHeader["LastVariableDataType"] + ): # Variable data type - index = columnRecord['Identifier'] - 127 - 1 - itemLen = unpack(' 255: + elif columnRecord["Identifier"] > 255: # Have we parsed the tagged items already? - if taggedItemsParsed is False and (variableDataBytesProcessed+variableSizeOffset) < tagLen: - index = variableDataBytesProcessed+variableSizeOffset + if ( + taggedItemsParsed is False + and (variableDataBytesProcessed + variableSizeOffset) < tagLen + ): + index = variableDataBytesProcessed + variableSizeOffset endOfVS = self.__pageSize - firstOffsetTag = (unpack('= 17 and self.__DBHeader['PageSize'] > 8192: + if ( + self.__DBHeader["Version"] == 0x620 + and self.__DBHeader["FileFormatRevision"] >= 17 + and self.__DBHeader["PageSize"] > 8192 + ): flagsPresent = 1 else: - flagsPresent = (unpack('= firstOffsetTag: # We reached the end of the variable size array break - + # Calculate length of variable items prevKey = list(taggedItems.keys())[0] - for i in range(1,len(taggedItems)): + for i in range(1, len(taggedItems)): offset0, length, flags = taggedItems[prevKey] offset, _, _ = list(taggedItems.items())[i][1] - taggedItems[prevKey] = (offset0, offset-offset0, flags) + taggedItems[prevKey] = (offset0, offset - offset0, flags) prevKey = list(taggedItems.keys())[i] taggedItemsParsed = True - + # Tagged data type - if columnRecord['Identifier'] in taggedItems: - offsetItem = variableDataBytesProcessed + variableSizeOffset + taggedItems[columnRecord['Identifier']][0] - itemSize = taggedItems[columnRecord['Identifier']][1] + if columnRecord["Identifier"] in taggedItems: + offsetItem = ( + variableDataBytesProcessed + + variableSizeOffset + + taggedItems[columnRecord["Identifier"]][0] + ) + itemSize = taggedItems[columnRecord["Identifier"]][1] # If the item has flags, get them and adjust offset - if taggedItems[columnRecord['Identifier']][2] > 0: - itemFlag = ord(tag[offsetItem:offsetItem+1]) + if taggedItems[columnRecord["Identifier"]][2] > 0: + itemFlag = ord(tag[offsetItem : offsetItem + 1]) record.flags = itemFlag offsetItem += 1 itemSize -= 1 @@ -808,17 +861,17 @@ def __tagToRecord(self, cursor, tag): itemFlag = 0 # Compressed data not currently handled - if itemFlag & (TAGGED_DATA_TYPE_COMPRESSED ): + if itemFlag & (TAGGED_DATA_TYPE_COMPRESSED): record[column] = None # Long values elif itemFlag & TAGGED_DATA_TYPE_STORED: - data = tag[offsetItem:offsetItem+itemSize] + data = tag[offsetItem : offsetItem + itemSize] record[column] = self.getLongValue(cursor, data) elif itemFlag & TAGGED_DATA_TYPE_MULTI_VALUE: - record[column] = (tag[offsetItem:offsetItem+itemSize],) + record[column] = (tag[offsetItem : offsetItem + itemSize],) else: - record[column] = tag[offsetItem:offsetItem+itemSize] + record[column] = tag[offsetItem : offsetItem + itemSize] else: record[column] = None @@ -829,12 +882,17 @@ def __tagToRecord(self, cursor, tag): if type(record[column]) is tuple: # Not decoding multi value data record[column] = record[column][0] - elif columnRecord['ColumnType'] == JET_coltypText or columnRecord['ColumnType'] == JET_coltypLongText: + elif ( + columnRecord["ColumnType"] == JET_coltypText + or columnRecord["ColumnType"] == JET_coltypLongText + ): # Strings if record[column] is not None: - if columnRecord['CodePage'] not in StringCodePages: - raise Exception('Unknown codepage 0x%x'% columnRecord['CodePage']) - stringDecoder = StringCodePages[columnRecord['CodePage']] + if columnRecord["CodePage"] not in StringCodePages: + raise Exception( + "Unknown codepage 0x%x" % columnRecord["CodePage"] + ) + stringDecoder = StringCodePages[columnRecord["CodePage"]] try: record[column] = record[column].decode(stringDecoder) @@ -842,7 +900,7 @@ def __tagToRecord(self, cursor, tag): record[column] = record[column].decode(stringDecoder, "replace") pass else: - unpackData = ColumnTypeSize[columnRecord['ColumnType']] + unpackData = ColumnTypeSize[columnRecord["ColumnType"]] if record[column] is not None and unpackData is not None: unpackStr = unpackData[1] record[column] = unpack(unpackStr, record[column])[0] diff --git a/src/python/strelka/cstructs/bits/structure.py b/src/python/strelka/cstructs/bits/structure.py index 2acc1139..be8a2679 100644 --- a/src/python/strelka/cstructs/bits/structure.py +++ b/src/python/strelka/cstructs/bits/structure.py @@ -5,79 +5,81 @@ # for more information. # -from struct import pack, unpack, calcsize +from struct import calcsize, pack, unpack + class Structure: - """ sublcasses can define commonHdr and/or structure. - each of them is an tuple of either two: (fieldName, format) or three: (fieldName, ':', class) fields. - [it can't be a dictionary, because order is important] - - where format specifies how the data in the field will be converted to/from bytes (string) - class is the class to use when unpacking ':' fields. - - each field can only contain one value (or an array of values for *) - i.e. struct.pack('Hl',1,2) is valid, but format specifier 'Hl' is not (you must use 2 dfferent fields) - - format specifiers: - specifiers from module pack can be used with the same format - see struct.__doc__ (pack/unpack is finally called) - x [padding byte] - c [character] - b [signed byte] - B [unsigned byte] - h [signed short] - H [unsigned short] - l [signed long] - L [unsigned long] - i [signed integer] - I [unsigned integer] - q [signed long long (quad)] - Q [unsigned long long (quad)] - s [string (array of chars), must be preceded with length in format specifier, padded with zeros] - p [pascal string (includes byte count), must be preceded with length in format specifier, padded with zeros] - f [float] - d [double] - = [native byte ordering, size and alignment] - @ [native byte ordering, standard size and alignment] - ! [network byte ordering] - < [little endian] - > [big endian] - - usual printf like specifiers can be used (if started with %) - [not recommended, there is no way to unpack this] - - %08x will output an 8 bytes hex - %s will output a string - %s\\x00 will output a NUL terminated string - %d%d will output 2 decimal digits (against the very same specification of Structure) - ... - - some additional format specifiers: - : just copy the bytes from the field into the output string (input may be string, other structure, or anything responding to __str__()) (for unpacking, all what's left is returned) - z same as :, but adds a NUL byte at the end (asciiz) (for unpacking the first NUL byte is used as terminator) [asciiz string] - u same as z, but adds two NUL bytes at the end (after padding to an even size with NULs). (same for unpacking) [unicode string] - w DCE-RPC/NDR string (it's a macro for [ ' [big endian] + + usual printf like specifiers can be used (if started with %) + [not recommended, there is no way to unpack this] + + %08x will output an 8 bytes hex + %s will output a string + %s\\x00 will output a NUL terminated string + %d%d will output 2 decimal digits (against the very same specification of Structure) + ... + + some additional format specifiers: + : just copy the bytes from the field into the output string (input may be string, other structure, or anything responding to __str__()) (for unpacking, all what's left is returned) + z same as :, but adds a NUL byte at the end (asciiz) (for unpacking the first NUL byte is used as terminator) [asciiz string] + u same as z, but adds two NUL bytes at the end (after padding to an even size with NULs). (same for unpacking) [unicode string] + w DCE-RPC/NDR string (it's a macro for [ ' 2: dataClassOrCode = field[2] try: - self[field[0]] = self.unpack(field[1], data[:size], dataClassOrCode = dataClassOrCode, field = field[0]) + self[field[0]] = self.unpack( + field[1], + data[:size], + dataClassOrCode=dataClassOrCode, + field=field[0], + ) except Exception as e: - e.args += ("When unpacking field '%s | %s | %r[:%d]'" % (field[0], field[1], data, size),) + e.args += ( + "When unpacking field '%s | %s | %r[:%d]'" + % (field[0], field[1], data, size), + ) raise size = self.calcPackSize(field[1], self[field[0]], field[0]) @@ -145,17 +161,17 @@ def fromString(self, data): data = data[size:] return self - + def __setitem__(self, key, value): self.fields[key] = value - self.data = None # force recompute + self.data = None # force recompute def __getitem__(self, key): return self.fields[key] def __delitem__(self, key): del self.fields[key] - + def __str__(self): return self.getData() @@ -163,52 +179,54 @@ def __len__(self): # XXX: improve return len(self.getData()) - def pack(self, format, data, field = None): + def pack(self, format, data, field=None): if field: addressField = self.findAddressFieldFor(field) if (addressField is not None) and (data is None): - return b'' + return b"" # void specifier - if format[:1] == '_': - return b'' + if format[:1] == "_": + return b"" # quote specifier if format[:1] == "'" or format[:1] == '"': return format[1:].encode("latin-1") # code specifier - two = format.split('=') + two = format.split("=") if len(two) >= 2: try: return self.pack(two[0], data) - except: - fields = {'self':self} + except Exception: + fields = {"self": self} fields.update(self.fields) return self.pack(two[0], eval(two[1], {}, fields)) # address specifier - two = format.split('&') + two = format.split("&") if len(two) == 2: try: return self.pack(two[0], data) - except: + except Exception: if (two[1] in self.fields) and (self[two[1]] is not None): - return self.pack(two[0], id(self[two[1]]) & ((1<<(calcsize(two[0])*8))-1) ) + return self.pack( + two[0], id(self[two[1]]) & ((1 << (calcsize(two[0]) * 8)) - 1) + ) else: return self.pack(two[0], 0) # length specifier - two = format.split('-') + two = format.split("-") if len(two) == 2: try: - return self.pack(two[0],data) - except: + return self.pack(two[0], data) + except Exception: return self.pack(two[0], self.calcPackFieldSize(two[1])) # array specifier - two = format.split('*') + two = format.split("*") if len(two) == 2: answer = bytes() for each in data: @@ -216,40 +234,42 @@ def pack(self, format, data, field = None): if two[0]: if two[0].isdigit(): if int(two[0]) != len(data): - raise Exception("Array field has a constant size, and it doesn't match the actual value") + raise Exception( + "Array field has a constant size, and it doesn't match the actual value" + ) else: - return self.pack(two[0], len(data))+answer + return self.pack(two[0], len(data)) + answer return answer # "printf" string specifier - if format[:1] == '%': + if format[:1] == "%": # format string like specifier return (format % data).encode("latin-1") # asciiz specifier - if format[:1] == 'z': - if isinstance(data,bytes): - return data + b'\0' - return bytes(data)+b'\0' + if format[:1] == "z": + if isinstance(data, bytes): + return data + b"\0" + return bytes(data) + b"\0" # unicode specifier - if format[:1] == 'u': - return bytes(data+b'\0\0' + (len(data) & 1 and b'\0' or b'')) + if format[:1] == "u": + return bytes(data + b"\0\0" + (len(data) & 1 and b"\0" or b"")) # DCE-RPC/NDR string specifier - if format[:1] == 'w': + if format[:1] == "w": if len(data) == 0: - data = b'\0\0' + data = b"\0\0" elif len(data) % 2: - data = data.encode("latin-1") + b'\0' - l = pack('= 2: - return self.unpack(two[0],data) + return self.unpack(two[0], data) # length specifier - two = format.split('-') + two = format.split("-") if len(two) == 2: - return self.unpack(two[0],data) + return self.unpack(two[0], data) # array specifier - two = format.split('*') + two = format.split("*") if len(two) == 2: answer = [] sofar = 0 @@ -325,39 +348,40 @@ def unpack(self, format, data, dataClassOrCode = None, field = None): number = -1 while number and sofar < len(data): - nsofar = sofar + self.calcUnpackSize(two[1],data[sofar:]) + nsofar = sofar + self.calcUnpackSize(two[1], data[sofar:]) answer.append(self.unpack(two[1], data[sofar:nsofar], dataClassOrCode)) number -= 1 sofar = nsofar return answer # "printf" string specifier - if format[:1] == '%': + if format[:1] == "%": # format string like specifier return format % data # asciiz specifier - if format == 'z': - if data[-1:] != b'\x00': - raise Exception("%s 'z' field is not NUL terminated: %r" % (field, data)) - if PY3: - return data[:-1].decode('latin-1') - else: - return data[:-1] + if format == "z": + if data[-1:] != b"\x00": + raise Exception( + "%s 'z' field is not NUL terminated: %r" % (field, data) + ) + return data[:-1] # unicode specifier - if format == 'u': - if data[-2:] != b'\x00\x00': - raise Exception("%s 'u' field is not NUL-NUL terminated: %r" % (field, data)) - return data[:-2] # remove trailing NUL + if format == "u": + if data[-2:] != b"\x00\x00": + raise Exception( + "%s 'u' field is not NUL-NUL terminated: %r" % (field, data) + ) + return data[:-2] # remove trailing NUL # DCE-RPC/NDR string specifier - if format == 'w': - l = unpack('= 2: return self.calcPackSize(two[0], data) # length specifier - two = format.split('-') + two = format.split("-") if len(two) == 2: return self.calcPackSize(two[0], data) # array specifier - two = format.split('*') + two = format.split("*") if len(two) == 2: answer = 0 if two[0].isdigit(): - if int(two[0]) != len(data): - raise Exception("Array field has a constant size, and it doesn't match the actual value") + if int(two[0]) != len(data): + raise Exception( + "Array field has a constant size, and it doesn't match the actual value" + ) elif two[0]: answer += self.calcPackSize(two[0], len(data)) @@ -410,35 +436,35 @@ def calcPackSize(self, format, data, field = None): return answer # "printf" string specifier - if format[:1] == '%': + if format[:1] == "%": # format string like specifier return len(format % data) # asciiz specifier - if format[:1] == 'z': - return len(data)+1 + if format[:1] == "z": + return len(data) + 1 # asciiz specifier - if format[:1] == 'u': - l = len(data) - return l + (l & 1 and 3 or 2) + if format[:1] == "u": + length = len(data) + return length + (length & 1 and 3 or 2) # DCE-RPC/NDR string specifier - if format[:1] == 'w': - l = len(data) - return 12+l+l % 2 + if format[:1] == "w": + length = len(data) + return 12 + length + length % 2 # literal specifier - if format[:1] == ':': + if format[:1] == ":": return len(data) # struct like specifier return calcsize(format) - def calcUnpackSize(self, format, data, field = None): + def calcUnpackSize(self, format, data, field=None): # void specifier - if format[:1] == '_': + if format[:1] == "_": return 0 addressField = self.findAddressFieldFor(field) @@ -453,28 +479,28 @@ def calcUnpackSize(self, format, data, field = None): pass # XXX: Try to match to actual values, raise if no match - + # quote specifier if format[:1] == "'" or format[:1] == '"': - return len(format)-1 + return len(format) - 1 # address specifier - two = format.split('&') + two = format.split("&") if len(two) == 2: return self.calcUnpackSize(two[0], data) # code specifier - two = format.split('=') + two = format.split("=") if len(two) >= 2: return self.calcUnpackSize(two[0], data) # length specifier - two = format.split('-') + two = format.split("-") if len(two) == 2: return self.calcUnpackSize(two[0], data) # array specifier - two = format.split('*') + two = format.split("*") if len(two) == 2: answer = 0 if two[0]: @@ -493,72 +519,74 @@ def calcUnpackSize(self, format, data, field = None): return answer # "printf" string specifier - if format[:1] == '%': - raise Exception("Can't guess the size of a printf like specifier for unpacking") + if format[:1] == "%": + raise Exception( + "Can't guess the size of a printf like specifier for unpacking" + ) # asciiz specifier - if format[:1] == 'z': - return data.index(b'\x00')+1 + if format[:1] == "z": + return data.index(b"\x00") + 1 # asciiz specifier - if format[:1] == 'u': - l = data.index(b'\x00\x00') - return l + (l & 1 and 3 or 2) + if format[:1] == "u": + dat = data.index(b"\x00\x00") + return dat + (dat & 1 and 3 or 2) # DCE-RPC/NDR string specifier - if format[:1] == 'w': - l = unpack('?@[\\]^_`{|}~ ': - return chr(x) + if ( + chr(x) + in "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ " + ): + return chr(x) else: - return u'.' + return "." diff --git a/src/python/strelka/cstructs/lnk.py b/src/python/strelka/cstructs/lnk.py index 7ae88caa..cebd5ce3 100644 --- a/src/python/strelka/cstructs/lnk.py +++ b/src/python/strelka/cstructs/lnk.py @@ -1,92 +1,113 @@ -from construct import Struct, Int16ul, GreedyRange, Bytes, StringEncoded, this, Int32ul, If, Enum, CString, IfThenElse, BitsSwapped, BitStruct, Flag, Int32sl, Int8ul +from construct import ( + BitsSwapped, + BitStruct, + Bytes, + CString, + Enum, + Flag, + GreedyRange, + If, + IfThenElse, + Int8ul, + Int16ul, + Int32sl, + Int32ul, + StringEncoded, + Struct, + this, +) UnicodeString = "UnicodeString" / Struct( - "Length" / Int32ul, - "Characters" / StringEncoded(Bytes(this.Length * 2), "utf16") + "Length" / Int32ul, "Characters" / StringEncoded(Bytes(this.Length * 2), "utf16") ) LinkTargetIDList = "LinkTargetIDList" / Struct( "IDListSize" / Int16ul, - "ItemID" / GreedyRange(Struct( - "ItemIDSize" / Int16ul, - "Data" / Bytes(this.ItemIDSize - 2), - )), - "TerminalID" / Int16ul + "ItemID" + / GreedyRange( + Struct( + "ItemIDSize" / Int16ul, + "Data" / Bytes(this.ItemIDSize - 2), + ) + ), + "TerminalID" / Int16ul, ) TypedPropertyValue = "TypedPropertyValue" / Struct( - "Type" / Enum(Int16ul, - VT_EMPTY=0x0000, - VT_NULL=0x0001, - VT_I2=0x0002, - VT_I4=0x0003, - VT_R4=0x0004, - VT_R8=0x0005, - VT_CY=0x0006, - VT_DATE=0x0007, - VT_BSTR=0x0008, - VT_ERROR=0x000A, - VT_BOOL=0x000B, - VT_DECIMAL=0x000E, - VT_I1=0x0010, - VT_UI1=0x0011, - VT_UI2=0x0012, - VT_UI4=0x0013, - VT_I8=0x0014, - VT_UI8=0x0015, - VT_INT=0x0016, - VT_UINT=0x0017, - VT_LPSTR=0x001E, - VT_LPWSTR=0x001F, - VT_FILETIME=0x0040, - VT_BLOB=0x0041, - VT_STREAM=0x0042, - VT_STORAGE=0x0043, - VT_STREAMED_Object=0x0044, - VT_STORED_Object=0x0045, - VT_BLOB_Object=0x0046, - VT_CF=0x0047, - VT_CLSID=0x0048, - VT_VERSIONED_STREAM=0x0049, - VT_I2_2=0x1002, - VT_I4_2=0x1003, - VT_R4_2=0x1004, - VT_R8_2=0x1005, - VT_CY_2=0x1006, - VT_DATE_2=0x1007, - VT_BSTR_2=0x1008, - VT_ERROR_2=0x100A, - VT_BOOL_2=0x100B, - VT_VARIANT_2=0x100C, - VT_I1_2=0x1010, - VT_UI1_2=0x1011, - VT_UI2_2=0x1012, - VT_UI4_2=0x1013, - VT_I8_2=0x1014, - VT_UI8_2=0x1015, - VT_LPSTR_2=0x101E, - VT_LPWSTR_2=0x101F, - VT_FILETIME_2=0x1040, - VT_CF_2=0x1047, - VT_CLSID_2=0x1048, - VT_I2_3=0x2002, - VT_I4_3=0x2003, - VT_R4_3=0x2004, - VT_R8_3=0x2005, - VT_CY_3=0x2006, - VT_DATE_3=0x2007, - VT_BSTR_3=0x2008, - VT_ERROR_3=0x200A, - VT_BOOL_3=0x200B, - VT_VARIANT_3=0x200C, - VT_DECIMAL_3=0x200E, - VT_I1_3=0x2010, - VT_UI1_3=0x2011, - VT_UI2_3=0x2012, - VT_UI4_3=0x2013, - VT_INT_3=0x2016, - VT_UINT_3=0x2017 - ), + "Type" + / Enum( + Int16ul, + VT_EMPTY=0x0000, + VT_NULL=0x0001, + VT_I2=0x0002, + VT_I4=0x0003, + VT_R4=0x0004, + VT_R8=0x0005, + VT_CY=0x0006, + VT_DATE=0x0007, + VT_BSTR=0x0008, + VT_ERROR=0x000A, + VT_BOOL=0x000B, + VT_DECIMAL=0x000E, + VT_I1=0x0010, + VT_UI1=0x0011, + VT_UI2=0x0012, + VT_UI4=0x0013, + VT_I8=0x0014, + VT_UI8=0x0015, + VT_INT=0x0016, + VT_UINT=0x0017, + VT_LPSTR=0x001E, + VT_LPWSTR=0x001F, + VT_FILETIME=0x0040, + VT_BLOB=0x0041, + VT_STREAM=0x0042, + VT_STORAGE=0x0043, + VT_STREAMED_Object=0x0044, + VT_STORED_Object=0x0045, + VT_BLOB_Object=0x0046, + VT_CF=0x0047, + VT_CLSID=0x0048, + VT_VERSIONED_STREAM=0x0049, + VT_I2_2=0x1002, + VT_I4_2=0x1003, + VT_R4_2=0x1004, + VT_R8_2=0x1005, + VT_CY_2=0x1006, + VT_DATE_2=0x1007, + VT_BSTR_2=0x1008, + VT_ERROR_2=0x100A, + VT_BOOL_2=0x100B, + VT_VARIANT_2=0x100C, + VT_I1_2=0x1010, + VT_UI1_2=0x1011, + VT_UI2_2=0x1012, + VT_UI4_2=0x1013, + VT_I8_2=0x1014, + VT_UI8_2=0x1015, + VT_LPSTR_2=0x101E, + VT_LPWSTR_2=0x101F, + VT_FILETIME_2=0x1040, + VT_CF_2=0x1047, + VT_CLSID_2=0x1048, + VT_I2_3=0x2002, + VT_I4_3=0x2003, + VT_R4_3=0x2004, + VT_R8_3=0x2005, + VT_CY_3=0x2006, + VT_DATE_3=0x2007, + VT_BSTR_3=0x2008, + VT_ERROR_3=0x200A, + VT_BOOL_3=0x200B, + VT_VARIANT_3=0x200C, + VT_DECIMAL_3=0x200E, + VT_I1_3=0x2010, + VT_UI1_3=0x2011, + VT_UI2_3=0x2012, + VT_UI4_3=0x2013, + VT_INT_3=0x2016, + VT_UINT_3=0x2017, + ), "Padding" / Bytes(2), # "Value" / If(this.Type=='VT_LPWSTR', UnicodeString) ) @@ -94,308 +115,360 @@ ExtraData = "ExtraData" / Struct( "BlockSize" / Int32ul, "BlockSignature" / Int32ul, - "ConsoleDataBlock" / If(this.BlockSignature == 0xA0000002, Struct( - "FileAttributes" / Enum(Int16ul, - FOREGROUND_BLUE=0x001, - FOREGROUND_GREEN=0x002, - FOREGROUND_RED=0x004, - FOREGROUND_INTENSITY=0x008, - BACKGROUND_BLUE=0x010, - BACKGROUND_GREEN=0x020, - BACKGROUND_RED=0x040, - BACKGROUND_INTENSITY=0x0080 - ), - "PopupFillAttributes" / Enum(Int16ul, - FOREGROUND_BLUE=0x001, - FOREGROUND_GREEN=0x002, - FOREGROUND_RED=0x004, - FOREGROUND_INTENSITY=0x008, - BACKGROUND_BLUE=0x010, - BACKGROUND_GREEN=0x020, - BACKGROUND_RED=0x040, - BACKGROUND_INTENSITY=0x0080 - ), - "ScreenBufferSizeX" / Int16ul, - "ScreenBufferSizeY" / Int16ul, - "WindowSizeX" / Int16ul, - "WindowSizeY" / Int16ul, - "WindowOriginX" / Int16ul, - "WindowOriginY" / Int16ul, - "Unused1" / Bytes(4), - "Unused2" / Bytes(4), - "FontSize" / Int32ul, - "FontFamily" / Enum(Int32ul, - FF_DONTCARE=0x0000, - FF_ROMAN=0x0010, - FF_SWISS=0x0020, - FF_MODERN=0x0030, - FF_SCRIPT=0x0040, - FF_DECORATIVE=0x0050, - TMPF_NONE=0x0000, - TMPF_FIXED_PITCH=0x0001, - TMPF_VECTOR=0x0002, - TMPF_TRUETYPE=0x0004, - TMPF_DEVICE=0x0004 - ), - "FontWeight" / Int32ul, - "FaceName" / Bytes(64), - "CursorSize" / Int32ul, - "FullScreen" / Int32ul, - "QuickEdit" / Int32ul, - "InsertMode" / Int32ul, - "AutoPosition" / Int32ul, - "HistoryBufferSize" / Int32ul, - "NumberOfHistoryBuffers" / Int32ul, - "HistoryNoDup" / Int32ul, - "ColorTable" / Bytes(64) - )), - "ConsoleFEDataBlock" / If(this.BlockSignature == 0xA0000004, Struct( - "CodePage" / Int32ul - )), - "DarwinDataBlock" / If(this.BlockSignature == 0xA0000006, Struct( - "TargetAnsi" / CString("utf8"), - "TargetUnicode" / CString("utf16") - )), - "EnvironmentVariableDataBlock" / If(this.BlockSignature == 0xA0000001, Struct( - "TargetAnsi" / CString("utf8"), - "TargetUnicode" / CString("utf16") - )), - "IconEnvironmentDataBlock" / If(this.BlockSignature == 0xA0000007, Struct( - "TargetAnsi" / CString("utf8"), - "TargetUnicode" / CString("utf16") - )), - "KnownFolderDataBlock" / If(this.BlockSignature == 0xA000000B, Struct( - "KnownFolderID" / Bytes(16), - "Offset" / Int32ul, - )), - "PropertyStoreDataBlock" / If(this.BlockSignature == 0xA0000009, Struct( - "PropertyStore" / Struct( - # "StoreSize" / Int32ul, - "SerializedPropertyStorage" / Struct( - "StorageSize" / Int32ul, - "Version" / Int32ul, - "FormatID" / Bytes(16), - "StringName" / IfThenElse( - this.FormatID == b'\xd5\xcd\xd5\x05\x2e\x9c\x10\x1b\x93\x97\x08\x00\x2b\x2c\xf9\xae', - Struct( - "ValueSize" / Int32ul, - "NameSize" / Int32ul, - "Reserved" / Bytes(1), - "Name" / CString("utf16"), - "TypedPropertyValue" / TypedPropertyValue + "ConsoleDataBlock" + / If( + this.BlockSignature == 0xA0000002, + Struct( + "FileAttributes" + / Enum( + Int16ul, + FOREGROUND_BLUE=0x001, + FOREGROUND_GREEN=0x002, + FOREGROUND_RED=0x004, + FOREGROUND_INTENSITY=0x008, + BACKGROUND_BLUE=0x010, + BACKGROUND_GREEN=0x020, + BACKGROUND_RED=0x040, + BACKGROUND_INTENSITY=0x0080, + ), + "PopupFillAttributes" + / Enum( + Int16ul, + FOREGROUND_BLUE=0x001, + FOREGROUND_GREEN=0x002, + FOREGROUND_RED=0x004, + FOREGROUND_INTENSITY=0x008, + BACKGROUND_BLUE=0x010, + BACKGROUND_GREEN=0x020, + BACKGROUND_RED=0x040, + BACKGROUND_INTENSITY=0x0080, + ), + "ScreenBufferSizeX" / Int16ul, + "ScreenBufferSizeY" / Int16ul, + "WindowSizeX" / Int16ul, + "WindowSizeY" / Int16ul, + "WindowOriginX" / Int16ul, + "WindowOriginY" / Int16ul, + "Unused1" / Bytes(4), + "Unused2" / Bytes(4), + "FontSize" / Int32ul, + "FontFamily" + / Enum( + Int32ul, + FF_DONTCARE=0x0000, + FF_ROMAN=0x0010, + FF_SWISS=0x0020, + FF_MODERN=0x0030, + FF_SCRIPT=0x0040, + FF_DECORATIVE=0x0050, + TMPF_NONE=0x0000, + TMPF_FIXED_PITCH=0x0001, + TMPF_VECTOR=0x0002, + TMPF_TRUETYPE=0x0004, + TMPF_DEVICE=0x0004, + ), + "FontWeight" / Int32ul, + "FaceName" / Bytes(64), + "CursorSize" / Int32ul, + "FullScreen" / Int32ul, + "QuickEdit" / Int32ul, + "InsertMode" / Int32ul, + "AutoPosition" / Int32ul, + "HistoryBufferSize" / Int32ul, + "NumberOfHistoryBuffers" / Int32ul, + "HistoryNoDup" / Int32ul, + "ColorTable" / Bytes(64), + ), + ), + "ConsoleFEDataBlock" + / If(this.BlockSignature == 0xA0000004, Struct("CodePage" / Int32ul)), + "DarwinDataBlock" + / If( + this.BlockSignature == 0xA0000006, + Struct("TargetAnsi" / CString("utf8"), "TargetUnicode" / CString("utf16")), + ), + "EnvironmentVariableDataBlock" + / If( + this.BlockSignature == 0xA0000001, + Struct("TargetAnsi" / CString("utf8"), "TargetUnicode" / CString("utf16")), + ), + "IconEnvironmentDataBlock" + / If( + this.BlockSignature == 0xA0000007, + Struct("TargetAnsi" / CString("utf8"), "TargetUnicode" / CString("utf16")), + ), + "KnownFolderDataBlock" + / If( + this.BlockSignature == 0xA000000B, + Struct( + "KnownFolderID" / Bytes(16), + "Offset" / Int32ul, + ), + ), + "PropertyStoreDataBlock" + / If( + this.BlockSignature == 0xA0000009, + Struct( + "PropertyStore" + / Struct( + # "StoreSize" / Int32ul, + "SerializedPropertyStorage" + / Struct( + "StorageSize" / Int32ul, + "Version" / Int32ul, + "FormatID" / Bytes(16), + "StringName" + / IfThenElse( + this.FormatID + == b"\xd5\xcd\xd5\x05\x2e\x9c\x10\x1b\x93\x97\x08\x00\x2b\x2c\xf9\xae", + Struct( + "ValueSize" / Int32ul, + "NameSize" / Int32ul, + "Reserved" / Bytes(1), + "Name" / CString("utf16"), + "TypedPropertyValue" / TypedPropertyValue, + ), + Struct( + "ValueSize" / Int32ul, + "Id" / Int32ul, + "Reserved" / Bytes(1), + "TypedPropertyValue" / TypedPropertyValue, + ), ), - Struct( - "ValueSize" / Int32ul, - "Id" / Int32ul, - "Reserved" / Bytes(1), - "TypedPropertyValue" / TypedPropertyValue - )), + ) ) - ) - )), - "ShimDataBlock" / If(this.BlockSignature == 0xA0000008, Struct( - "LayerName" / CString("utf16") - )), - "SpecialFolderDataBlock" / If(this.BlockSignature == 0xA0000005, Struct( - "SpecialFolderID" / Int32ul, - "Offset" / Int32ul, - "LinkTargetIDList" / LinkTargetIDList, - )), - "TrackerDataBlock" / If(this.BlockSignature == 0xA0000003, Struct( - "Length" / Int32ul, - "Version" / Int32ul, - "MachineID" / Bytes(16), - "Droid" / Bytes(32), - "DroidBirth" / Bytes(32) - )), - "VistaAndAboveIDListDataBlock" / If(this.BlockSignature == 0xA000000C, Struct( - "ItemIDList" / GreedyRange(Struct( - "ItemIDSize" / Int16ul, - "Data" / Bytes(this.ItemIDSize - 2), - )), - "TerminalID" / Int16ul - )), + ), + ), + "ShimDataBlock" + / If(this.BlockSignature == 0xA0000008, Struct("LayerName" / CString("utf16"))), + "SpecialFolderDataBlock" + / If( + this.BlockSignature == 0xA0000005, + Struct( + "SpecialFolderID" / Int32ul, + "Offset" / Int32ul, + "LinkTargetIDList" / LinkTargetIDList, + ), + ), + "TrackerDataBlock" + / If( + this.BlockSignature == 0xA0000003, + Struct( + "Length" / Int32ul, + "Version" / Int32ul, + "MachineID" / Bytes(16), + "Droid" / Bytes(32), + "DroidBirth" / Bytes(32), + ), + ), + "VistaAndAboveIDListDataBlock" + / If( + this.BlockSignature == 0xA000000C, + Struct( + "ItemIDList" + / GreedyRange( + Struct( + "ItemIDSize" / Int16ul, + "Data" / Bytes(this.ItemIDSize - 2), + ) + ), + "TerminalID" / Int16ul, + ), + ), ) ShellLinkHeader = "ShellLinkHeader" / Struct( "HeaderSize" / Int32ul, "LinkCLSID" / Bytes(16), - "LinkFlags" / BitsSwapped(BitStruct( - "HasLinkTargetIDList" / Flag, - "HasLinkInfo" / Flag, - "HasName" / Flag, - "HasRelativePath" / Flag, - "HasWorkingDir" / Flag, - "HasArguments" / Flag, - "HasIconLocation" / Flag, - "IsUnicode" / Flag, - "ForceNoLinkInfo" / Flag, - "HasExpString" / Flag, - "RunInSeparateProcess" / Flag, - "Unused1" / Flag, - "HasDarwinID" / Flag, - "RunAsUser" / Flag, - "HasExpIcon" / Flag, - "NoPidlAlias" / Flag, - "Unused2" / Flag, - "RunWithShimLayer" / Flag, - "ForceNoLinkTrack" / Flag, - "EnableTargetMetadata" / Flag, - "DisableLinkPathTracking" / Flag, - "DisableKnownFolderTracking" / Flag, - "DisableKnownFolderAlias" / Flag, - "AllowLinkToLink" / Flag, - "UnaliasOnSave" / Flag, - "PreferEnvironmentPath" / Flag, - "KeepLocalIDListForUNCTarget" / Flag, - Flag, - Flag, - Flag, - Flag, - Flag - )), - "FileAttributes" / BitsSwapped(BitStruct( - "FILE_ATTRIBUTE_READONLY" / Flag, - "FILE_ATTRIBUTE_READONLY" / Flag, - "FILE_ATTRIBUTE_SYSTEM" / Flag, - "Reserved1" / Flag, - "FILE_ATTRIBUTE_DIRECTORY" / Flag, - "FILE_ATTRIBUTE_ARCHIVE" / Flag, - "Reserved2" / Flag, - "FILE_ATTRIBUTE_NORMAL" / Flag, - "FILE_ATTRIBUTE_TEMPORARY" / Flag, - "FILE_ATTRIBUTE_SPARSE_FILE" / Flag, - "FILE_ATTRIBUTE_REPARSE_POINT" / Flag, - "FILE_ATTRIBUTE_COMPRESSED" / Flag, - "FILE_ATTRIBUTE_OFFLINE" / Flag, - "FILE_ATTRIBUTE_NOT_CONTENT_INDEXED" / Flag, - "FILE_ATTRIBUTE_ENCRYPTED" / Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag - )), + "LinkFlags" + / BitsSwapped( + BitStruct( + "HasLinkTargetIDList" / Flag, + "HasLinkInfo" / Flag, + "HasName" / Flag, + "HasRelativePath" / Flag, + "HasWorkingDir" / Flag, + "HasArguments" / Flag, + "HasIconLocation" / Flag, + "IsUnicode" / Flag, + "ForceNoLinkInfo" / Flag, + "HasExpString" / Flag, + "RunInSeparateProcess" / Flag, + "Unused1" / Flag, + "HasDarwinID" / Flag, + "RunAsUser" / Flag, + "HasExpIcon" / Flag, + "NoPidlAlias" / Flag, + "Unused2" / Flag, + "RunWithShimLayer" / Flag, + "ForceNoLinkTrack" / Flag, + "EnableTargetMetadata" / Flag, + "DisableLinkPathTracking" / Flag, + "DisableKnownFolderTracking" / Flag, + "DisableKnownFolderAlias" / Flag, + "AllowLinkToLink" / Flag, + "UnaliasOnSave" / Flag, + "PreferEnvironmentPath" / Flag, + "KeepLocalIDListForUNCTarget" / Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + ) + ), + "FileAttributes" + / BitsSwapped( + BitStruct( + "FILE_ATTRIBUTE_READONLY" / Flag, + "FILE_ATTRIBUTE_READONLY" / Flag, + "FILE_ATTRIBUTE_SYSTEM" / Flag, + "Reserved1" / Flag, + "FILE_ATTRIBUTE_DIRECTORY" / Flag, + "FILE_ATTRIBUTE_ARCHIVE" / Flag, + "Reserved2" / Flag, + "FILE_ATTRIBUTE_NORMAL" / Flag, + "FILE_ATTRIBUTE_TEMPORARY" / Flag, + "FILE_ATTRIBUTE_SPARSE_FILE" / Flag, + "FILE_ATTRIBUTE_REPARSE_POINT" / Flag, + "FILE_ATTRIBUTE_COMPRESSED" / Flag, + "FILE_ATTRIBUTE_OFFLINE" / Flag, + "FILE_ATTRIBUTE_NOT_CONTENT_INDEXED" / Flag, + "FILE_ATTRIBUTE_ENCRYPTED" / Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + ) + ), "CreationTime" / Bytes(8), "AccessTime" / Bytes(8), "WriteTime" / Bytes(8), "FileSize" / Int32ul, "IconIndex" / Int32sl, - "ShowCommand" / Enum(Int32ul, - SW_HIDE=0x00000000, - SW_NORMAL=0x00000001, - SW_SHOWMINIMIZED=0x00000002, - SW_SHOWMAXIMIZED=0x00000003, - SW_SHOWNOACTIVATE=0x00000004, - SW_SHOW=0x00000005, - SW_MINIMIZE=0x00000006, - SW_SHOWMINNOACTIVE=0x00000007, - SW_SHOWNA=0x00000008, - SW_RESTORE=0x00000009, - SW_SHOWDEFAULT=0x0000000a, - ), - "HotKey" / Struct( - "LowByte" / Int8ul, - "HighByte" / Int8ul + "ShowCommand" + / Enum( + Int32ul, + SW_HIDE=0x00000000, + SW_NORMAL=0x00000001, + SW_SHOWMINIMIZED=0x00000002, + SW_SHOWMAXIMIZED=0x00000003, + SW_SHOWNOACTIVATE=0x00000004, + SW_SHOW=0x00000005, + SW_MINIMIZE=0x00000006, + SW_SHOWMINNOACTIVE=0x00000007, + SW_SHOWNA=0x00000008, + SW_RESTORE=0x00000009, + SW_SHOWDEFAULT=0x0000000A, ), + "HotKey" / Struct("LowByte" / Int8ul, "HighByte" / Int8ul), "Reserved1" / Bytes(2), "Reserved2" / Bytes(4), - "Reserved3" / Bytes(4) + "Reserved3" / Bytes(4), ) CommonNetworkRelativeLink = "CommonNetworkRelativeLink" / Struct( "CommonNetworkRelativeLinkSize" / Int32ul, - "CommonNetworkRelativeLinkFlags" / BitsSwapped(BitStruct( - "ValidDevice" / Flag, - "ValideNetType" / Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag - )), + "CommonNetworkRelativeLinkFlags" + / BitsSwapped( + BitStruct( + "ValidDevice" / Flag, + "ValideNetType" / Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + ) + ), "NetNameOffset" / Int32ul, "DeviceNameOffset" / Int32ul, - "NetworkProviderType" / If(this.CommonNetworkRelativeLinkFlags.ValideNetType, Enum(Int32ul, - WNNC_NET_AVID=0x001A0000, - WNNC_NET_DOCUSPACE=0x001B0000, - WNNC_NET_MANGOSOFT=0x001C0000, - WNNC_NET_SERNET=0x001D0000, - WNNC_NET_RIVERFRONT1=0X001E0000, - WNNC_NET_RIVERFRONT2=0x001F0000, - WNNC_NET_DECORB=0x0020000, - WNNC_NET_PROTSTOR=0x00210000, - WNNC_NET_FJ_REDIR=0x00220000, - WNNC_NET_DISTINCT=0x00230000, - WNNC_NET_TWINS=0x00240000, - WNNC_NET_RDR2SAMPLE=0x00250000, - WNNC_NET_CSC=0x00260000, - WNNC_NET_3IN1=0x00270000, - WNNC_NET_EXTENDNET=0x00290000, - WNNC_NET_STAC=0x002A0000, - WNNC_NET_FOXBAT=0x002B0000, - WNNC_NET_YAHOO=0x002C0000, - WNNC_NET_EXIFS=0x002D0000, - WNNC_NET_DAV=0x002E0000, - WNNC_NET_KNOWARE=0x002F0000, - WNNC_NET_OBJECT_DIRE=0x00300000, - WNNC_NET_MASFAX=0x00310000, - WNNC_NET_HOB_NFS=0x00320000, - WNNC_NET_SHIVA=0x00330000, - WNNC_NET_IBMAL=0x00340000, - WNNC_NET_LOCK=0x00350000, - WNNC_NET_TERMSRV=0x00360000, - WNNC_NET_SRT=0x00370000, - WNNC_NET_QUINCY=0x00380000, - WNNC_NET_OPENAFS=0x00390000, - WNNC_NET_AVID1=0X003A0000, - WNNC_NET_DFS=0x003B0000, - WNNC_NET_KWNP=0x003C0000, - WNNC_NET_ZENWORKS=0x003D0000, - WNNC_NET_DRIVEONWEB=0x003E0000, - WNNC_NET_VMWARE=0x003F0000, - WNNC_NET_RSFX=0x00400000, - WNNC_NET_MFILES=0x00410000, - WNNC_NET_MS_NFS=0x00420000, - WNNC_NET_GOOGLE=0x00430000 - )), + "NetworkProviderType" + / If( + this.CommonNetworkRelativeLinkFlags.ValideNetType, + Enum( + Int32ul, + WNNC_NET_AVID=0x001A0000, + WNNC_NET_DOCUSPACE=0x001B0000, + WNNC_NET_MANGOSOFT=0x001C0000, + WNNC_NET_SERNET=0x001D0000, + WNNC_NET_RIVERFRONT1=0x001E0000, + WNNC_NET_RIVERFRONT2=0x001F0000, + WNNC_NET_DECORB=0x0020000, + WNNC_NET_PROTSTOR=0x00210000, + WNNC_NET_FJ_REDIR=0x00220000, + WNNC_NET_DISTINCT=0x00230000, + WNNC_NET_TWINS=0x00240000, + WNNC_NET_RDR2SAMPLE=0x00250000, + WNNC_NET_CSC=0x00260000, + WNNC_NET_3IN1=0x00270000, + WNNC_NET_EXTENDNET=0x00290000, + WNNC_NET_STAC=0x002A0000, + WNNC_NET_FOXBAT=0x002B0000, + WNNC_NET_YAHOO=0x002C0000, + WNNC_NET_EXIFS=0x002D0000, + WNNC_NET_DAV=0x002E0000, + WNNC_NET_KNOWARE=0x002F0000, + WNNC_NET_OBJECT_DIRE=0x00300000, + WNNC_NET_MASFAX=0x00310000, + WNNC_NET_HOB_NFS=0x00320000, + WNNC_NET_SHIVA=0x00330000, + WNNC_NET_IBMAL=0x00340000, + WNNC_NET_LOCK=0x00350000, + WNNC_NET_TERMSRV=0x00360000, + WNNC_NET_SRT=0x00370000, + WNNC_NET_QUINCY=0x00380000, + WNNC_NET_OPENAFS=0x00390000, + WNNC_NET_AVID1=0x003A0000, + WNNC_NET_DFS=0x003B0000, + WNNC_NET_KWNP=0x003C0000, + WNNC_NET_ZENWORKS=0x003D0000, + WNNC_NET_DRIVEONWEB=0x003E0000, + WNNC_NET_VMWARE=0x003F0000, + WNNC_NET_RSFX=0x00400000, + WNNC_NET_MFILES=0x00410000, + WNNC_NET_MS_NFS=0x00420000, + WNNC_NET_GOOGLE=0x00430000, + ), + ), If(this.NetNameOffset > 0x00000014, "NetNameOffsetUnicode" / Int32ul), If(this.NetNameOffset > 0x00000014, "DeviceNameOffsetUnicode" / Int32ul), "NetName" / CString("utf8"), @@ -406,66 +479,80 @@ LinkInfo = "LinkInfo" / Struct( "LinkInfoSize" / Int32ul, "LinkInfoHeaderSize" / Int32ul, - "LinkInfoFlags" / BitsSwapped(BitStruct( - "VolumeIDAndLocalBasePath" / Flag, - "CommonNetworkRelativeLinkAndPathSuffix" / Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag, - Flag - )), + "LinkInfoFlags" + / BitsSwapped( + BitStruct( + "VolumeIDAndLocalBasePath" / Flag, + "CommonNetworkRelativeLinkAndPathSuffix" / Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + Flag, + ) + ), "VolumeIDOffset" / Int32ul, "LocalBasePathOffset" / Int32ul, "CommonNetworkRelativeLinkOffset" / Int32ul, "CommonPathSuffixOffset" / Int32ul, "LocalBasePathOffsetUnicode" / If(this.LinkInfoHeaderSize >= 0x24, Int32ul), "CommonPathSuffixOffsetUnicode" / If(this.LinkInfoHeaderSize >= 0x24, Int32ul), - "VolumeID" / Struct( + "VolumeID" + / Struct( "VolumeIDSize" / Int32ul, - "DriveType" / Enum(Int32ul, - DRIVE_UNKNOWN=0x00000000, - DRIVE_NO_ROOT_DIR=0x00000001, - DRIVE_REMOVABLE=0x00000002, - DRIVE_FIXED=0x00000003, - DRIVE_REMOTE=0x00000004, - DRIVE_CDROM=0x00000005, - DRIVE_RAMDISK=0x00000006 - ), + "DriveType" + / Enum( + Int32ul, + DRIVE_UNKNOWN=0x00000000, + DRIVE_NO_ROOT_DIR=0x00000001, + DRIVE_REMOVABLE=0x00000002, + DRIVE_FIXED=0x00000003, + DRIVE_REMOTE=0x00000004, + DRIVE_CDROM=0x00000005, + DRIVE_RAMDISK=0x00000006, + ), "DriveSerialNumber" / Int32ul, "VolumeLabelOffset" / Int32ul, "VolumeLabelOffsetUnicode" / If(this.VolumeLabelOffset == 0x14, Int32ul), "Data" / CString("utf8"), ), "LocalBasePath" / If(this.LinkInfoFlags.VolumeIDAndLocalBasePath, CString("utf8")), - "CommonNetworkRelativeLink" / If(this.CommonNetworkRelativeLinkOffset, CommonNetworkRelativeLink), + "CommonNetworkRelativeLink" + / If(this.CommonNetworkRelativeLinkOffset, CommonNetworkRelativeLink), "CommonPathSuffix" / CString("utf8"), - "LocalBasePathUnicode" / If(this.LinkInfoHeaderSize == 0x24, If(this.LocalBasePathOffsetUnicode, CString("utf16"))), - "CommonPathSuffixUnicode" / If(this.LinkInfoHeaderSize == 0x24, - If(this.CommonPathSuffixOffsetUnicode, CString("utf16"))), -) \ No newline at end of file + "LocalBasePathUnicode" + / If( + this.LinkInfoHeaderSize == 0x24, + If(this.LocalBasePathOffsetUnicode, CString("utf16")), + ), + "CommonPathSuffixUnicode" + / If( + this.LinkInfoHeaderSize == 0x24, + If(this.CommonPathSuffixOffsetUnicode, CString("utf16")), + ), +) diff --git a/src/python/strelka/scanners/common/password_cracking.py b/src/python/strelka/scanners/common/password_cracking.py index 633fa441..c528e9cc 100644 --- a/src/python/strelka/scanners/common/password_cracking.py +++ b/src/python/strelka/scanners/common/password_cracking.py @@ -1,4 +1,3 @@ -import logging import os import re import subprocess @@ -152,7 +151,9 @@ def crack_john( hashes_per_second = 0.0 for statistic in re_statistics.finditer(stderr): - hashes_per_second = convert_unit_john(statistic.group("ccps").decode("utf-8")) + hashes_per_second = convert_unit_john( + statistic.group("ccps").decode("utf-8") + ) self.event["performance"] = { "keyspace": { diff --git a/src/python/strelka/scanners/scan_antiword.py b/src/python/strelka/scanners/scan_antiword.py index 50b55508..d0f9b856 100644 --- a/src/python/strelka/scanners/scan_antiword.py +++ b/src/python/strelka/scanners/scan_antiword.py @@ -11,20 +11,21 @@ class ScanAntiword(strelka.Scanner): tmp_directory: Location where tempfile writes temporary files. Defaults to '/tmp/'. """ + def scan(self, data, file, options, expire_at): - tmp_directory = options.get('tmp_directory', '/tmp/') + tmp_directory = options.get("tmp_directory", "/tmp/") with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: tmp_data.write(data) tmp_data.flush() (stdout, stderr) = subprocess.Popen( - ['antiword', tmp_data.name], + ["antiword", tmp_data.name], stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL + stderr=subprocess.DEVNULL, ).communicate() if stdout: # Send extracted file back to Strelka - self.emit_file(stdout, name='text') + self.emit_file(stdout, name="text") diff --git a/src/python/strelka/scanners/scan_base64.py b/src/python/strelka/scanners/scan_base64.py index 74c979ce..b2739430 100644 --- a/src/python/strelka/scanners/scan_base64.py +++ b/src/python/strelka/scanners/scan_base64.py @@ -5,6 +5,7 @@ class ScanBase64(strelka.Scanner): """Decodes base64-encoded file.""" + def scan(self, data, file, options, expire_at): decoded = base64.b64decode(data) diff --git a/src/python/strelka/scanners/scan_base64_pe.py b/src/python/strelka/scanners/scan_base64_pe.py index 3538d600..4c1393cd 100644 --- a/src/python/strelka/scanners/scan_base64_pe.py +++ b/src/python/strelka/scanners/scan_base64_pe.py @@ -7,17 +7,18 @@ class ScanBase64PE(strelka.Scanner): """Decodes base64-encoded file.""" + def scan(self, data, file, options, expire_at): with io.BytesIO(data) as encoded_file: - extract_data = b'' + extract_data = b"" try: extract_data = base64.b64decode(encoded_file.read()) - self.event['decoded_header'] = extract_data[:50] + self.event["decoded_header"] = extract_data[:50] except binascii.Error: - self.flags.append('not_decodable_from_base64') - + self.flags.append("not_decodable_from_base64") + if extract_data: # Send extracted file back to Strelka diff --git a/src/python/strelka/scanners/scan_batch.py b/src/python/strelka/scanners/scan_batch.py index 19e5d4c3..8e096104 100644 --- a/src/python/strelka/scanners/scan_batch.py +++ b/src/python/strelka/scanners/scan_batch.py @@ -1,6 +1,5 @@ import pygments -from pygments import formatters -from pygments import lexers +from pygments import formatters, lexers from strelka import strelka @@ -13,8 +12,9 @@ class ScanBatch(strelka.Scanner): Attributes: lexer: Pygments lexer ('batch') used to parse the file. """ + def init(self): - self.lexer = lexers.get_lexer_by_name('batch') + self.lexer = lexers.get_lexer_by_name("batch") def scan(self, data, file, options, expire_at): highlight = pygments.highlight( @@ -22,50 +22,50 @@ def scan(self, data, file, options, expire_at): self.lexer, formatters.RawTokenFormatter(), ) - highlight_list = highlight.split(b'\n') + highlight_list = highlight.split(b"\n") ordered_highlights = [] for hl in highlight_list: - split_highlight = hl.split(b'\t') + split_highlight = hl.split(b"\t") if len(split_highlight) == 2: token = split_highlight[0].decode() - value = split_highlight[1].decode().strip('\'"').strip() - highlight_entry = {'token': token, 'value': value} - if highlight_entry['value']: + value = split_highlight[1].decode().strip("'\"").strip() + highlight_entry = {"token": token, "value": value} + if highlight_entry["value"]: ordered_highlights.append(highlight_entry) - self.event.setdefault('tokens', []) - self.event.setdefault('comments', []) - self.event.setdefault('keywords', []) - self.event.setdefault('labels', []) - self.event.setdefault('strings', []) - self.event.setdefault('text', []) - self.event.setdefault('variables', []) + self.event.setdefault("tokens", []) + self.event.setdefault("comments", []) + self.event.setdefault("keywords", []) + self.event.setdefault("labels", []) + self.event.setdefault("strings", []) + self.event.setdefault("text", []) + self.event.setdefault("variables", []) position = 0 while position < len(ordered_highlights): ohlp = ordered_highlights[position] - if ohlp['token'] not in self.event['tokens']: - self.event['tokens'].append(ohlp['token']) - if ohlp['token'] == 'Token.Comment.Single': - if ohlp['value'] not in self.event['comments']: - self.event['comments'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Keyword': - if ohlp['value'] not in self.event['keywords']: - self.event['keywords'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Name.Label': - if ohlp['value'] not in self.event['labels']: - self.event['labels'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Literal.String.Double': - if ohlp['value'] not in self.event['strings']: - self.event['strings'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Literal.String.Single': - if ohlp['value'] not in self.event['strings']: - self.event['strings'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Text': - if ohlp['value'] not in self.event['text']: - self.event['text'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Name.Variable': - if ohlp['value'] not in self.event['variables']: - self.event['variables'].append(ohlp['value']) + if ohlp["token"] not in self.event["tokens"]: + self.event["tokens"].append(ohlp["token"]) + if ohlp["token"] == "Token.Comment.Single": + if ohlp["value"] not in self.event["comments"]: + self.event["comments"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Keyword": + if ohlp["value"] not in self.event["keywords"]: + self.event["keywords"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Name.Label": + if ohlp["value"] not in self.event["labels"]: + self.event["labels"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Literal.String.Double": + if ohlp["value"] not in self.event["strings"]: + self.event["strings"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Literal.String.Single": + if ohlp["value"] not in self.event["strings"]: + self.event["strings"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Text": + if ohlp["value"] not in self.event["text"]: + self.event["text"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Name.Variable": + if ohlp["value"] not in self.event["variables"]: + self.event["variables"].append(ohlp["value"]) position += 1 diff --git a/src/python/strelka/scanners/scan_bits.py b/src/python/strelka/scanners/scan_bits.py index 24cd90ee..6c02f7ed 100644 --- a/src/python/strelka/scanners/scan_bits.py +++ b/src/python/strelka/scanners/scan_bits.py @@ -1,47 +1,49 @@ +import datetime +import hashlib import os -import sys import string import struct -import hashlib -import datetime +import sys import traceback -from bits.structs import FILE, CONTROL, JOB - +from bits.structs import CONTROL, FILE, JOB from strelka.cstructs.bits.ese import ESENT_DB -from strelka import strelka +from strelka import strelka # XFER_HEADER defined as bytes -XFER_HEADER = b'\x36\xDA\x56\x77\x6F\x51\x5A\x43\xAC\xAC\x44\xA2\x48\xFF\xF3\x4D' +XFER_HEADER = b"\x36\xDA\x56\x77\x6F\x51\x5A\x43\xAC\xAC\x44\xA2\x48\xFF\xF3\x4D" # File and job delimiter constants for Windows 10 -WIN10_FILE_DELIMITER = b'\xE4\xCF\x9E\x51\x46\xD9\x97\x43\xB7\x3E\x26\x85\x13\x05\x1A\xB2' +WIN10_FILE_DELIMITER = ( + b"\xE4\xCF\x9E\x51\x46\xD9\x97\x43\xB7\x3E\x26\x85\x13\x05\x1A\xB2" +) WIN10_JOB_DELIMITERS = [ - b'\xA1\x56\x09\xE1\x43\xAF\xC9\x42\x92\xE6\x6F\x98\x56\xEB\xA7\xF6', - b'\x9F\x95\xD4\x4C\x64\x70\xF2\x4B\x84\xD7\x47\x6A\x7E\x62\x69\x9F', - b'\xF1\x19\x26\xA9\x32\x03\xBF\x4C\x94\x27\x89\x88\x18\x95\x88\x31', - b'\xC1\x33\xBC\xDD\xFB\x5A\xAF\x4D\xB8\xA1\x22\x68\xB3\x9D\x01\xAD', - b'\xd0\x57\x56\x8f\x2c\x01\x3e\x4e\xad\x2c\xf4\xa5\xd7\x65\x6f\xaf', - b'\x50\x67\x41\x94\x57\x03\x1d\x46\xa4\xcc\x5d\xd9\x99\x07\x06\xe4' + b"\xA1\x56\x09\xE1\x43\xAF\xC9\x42\x92\xE6\x6F\x98\x56\xEB\xA7\xF6", + b"\x9F\x95\xD4\x4C\x64\x70\xF2\x4B\x84\xD7\x47\x6A\x7E\x62\x69\x9F", + b"\xF1\x19\x26\xA9\x32\x03\xBF\x4C\x94\x27\x89\x88\x18\x95\x88\x31", + b"\xC1\x33\xBC\xDD\xFB\x5A\xAF\x4D\xB8\xA1\x22\x68\xB3\x9D\x01\xAD", + b"\xd0\x57\x56\x8f\x2c\x01\x3e\x4e\xad\x2c\xf4\xa5\xd7\x65\x6f\xaf", + b"\x50\x67\x41\x94\x57\x03\x1d\x46\xa4\xcc\x5d\xd9\x99\x07\x06\xe4", ] + class ScanBITS(strelka.Scanner): """Collects metadata and extracts files from Windows BITS files.""" def scan(self, data, file, options, expire_at): - self.event['jobs'] = [] + self.event["jobs"] = [] try: - self.event['jobs'] = self.process_file(data) + self.event["jobs"] = self.process_file(data) except strelka.ScannerTimeout: raise except Exception: self.flags.append("file_parsing_error") def process_file(self, file_data): - """ Processes the given BITS file. Attempts to find/parse jobs. """ + """Processes the given BITS file. Attempts to find/parse jobs.""" # Parse as a qmgr database (support old and Win10 formats) parsed_records = [] @@ -85,7 +87,7 @@ def parse_qmgr10_job(self, file_entries, job_data): try: # Following the JOB entry, there are usually XFER refs to FILE GUIDs - parsed_job['files'] = [] + parsed_job["files"] = [] xfer_parts = job_data.split(XFER_HEADER) file_ref_data = xfer_parts[1] num_file_refs = struct.unpack_from("= file_limit: @@ -235,7 +237,7 @@ def parse_file_modes(file_modes): continue # No DMG sample available has a file property of hidden - #if "hidden" in modes_list and "directory" in modes_list: + # if "hidden" in modes_list and "directory" in modes_list: # self.event["hidden_dirs"].append(match.group("name")) if "directory" not in modes_list: diff --git a/src/python/strelka/scanners/scan_docx.py b/src/python/strelka/scanners/scan_docx.py index fb79d27a..2f9441d8 100644 --- a/src/python/strelka/scanners/scan_docx.py +++ b/src/python/strelka/scanners/scan_docx.py @@ -1,9 +1,9 @@ import io -from bs4 import BeautifulSoup -from zlib import error -import docx import zipfile +import docx +from bs4 import BeautifulSoup + from strelka import strelka @@ -17,71 +17,79 @@ class ScanDocx(strelka.Scanner): """ def scan(self, data, file, options, expire_at): - extract_text = options.get('extract_text', False) + extract_text = options.get("extract_text", False) with io.BytesIO(data) as docx_io: try: docx_doc = docx.Document(docx_io) - self.event['author'] = docx_doc.core_properties.author - self.event['category'] = docx_doc.core_properties.category - self.event['comments'] = docx_doc.core_properties.comments - self.event['content_status'] = docx_doc.core_properties.content_status + self.event["author"] = docx_doc.core_properties.author + self.event["category"] = docx_doc.core_properties.category + self.event["comments"] = docx_doc.core_properties.comments + self.event["content_status"] = docx_doc.core_properties.content_status if docx_doc.core_properties.created is not None: - self.event['created'] = int(docx_doc.core_properties.created.strftime('%s')) - self.event['identifier'] = docx_doc.core_properties.identifier - self.event['keywords'] = docx_doc.core_properties.keywords - self.event['language'] = docx_doc.core_properties.language - self.event['last_modified_by'] = docx_doc.core_properties.last_modified_by + self.event["created"] = int( + docx_doc.core_properties.created.strftime("%s") + ) + self.event["identifier"] = docx_doc.core_properties.identifier + self.event["keywords"] = docx_doc.core_properties.keywords + self.event["language"] = docx_doc.core_properties.language + self.event[ + "last_modified_by" + ] = docx_doc.core_properties.last_modified_by if docx_doc.core_properties.last_printed is not None: - self.event['last_printed'] = int(docx_doc.core_properties.last_printed.strftime('%s')) + self.event["last_printed"] = int( + docx_doc.core_properties.last_printed.strftime("%s") + ) if docx_doc.core_properties.modified is not None: - self.event['modified'] = int(docx_doc.core_properties.modified.strftime('%s')) - self.event['revision'] = docx_doc.core_properties.revision - self.event['subject'] = docx_doc.core_properties.subject - self.event['title'] = docx_doc.core_properties.title - self.event['version'] = docx_doc.core_properties.version - self.event['font_colors'] = [''] - self.event['word_count'] = 0 - self.event['image_count'] = 0 + self.event["modified"] = int( + docx_doc.core_properties.modified.strftime("%s") + ) + self.event["revision"] = docx_doc.core_properties.revision + self.event["subject"] = docx_doc.core_properties.subject + self.event["title"] = docx_doc.core_properties.title + self.event["version"] = docx_doc.core_properties.version + self.event["font_colors"] = [""] + self.event["word_count"] = 0 + self.event["image_count"] = 0 for paragraph in docx_doc.paragraphs: - soup = BeautifulSoup(paragraph.paragraph_format.element.xml, 'xml') - color_list = soup.select('color') + soup = BeautifulSoup(paragraph.paragraph_format.element.xml, "xml") + color_list = soup.select("color") for color_xml in color_list: - color = color_xml.attrs['w:val'] - if not color in self.event['font_colors']: - self.event['font_colors'].append(color) + color = color_xml.attrs["w:val"] + if color not in self.event["font_colors"]: + self.event["font_colors"].append(color) - image_list = soup.select('pic') + image_list = soup.select("pic") for images in image_list: - if images.attrs['xmlns:pic']: - self.event['image_count'] += 1 + if images.attrs["xmlns:pic"]: + self.event["image_count"] += 1 - para_words = paragraph.text.split(' ') + para_words = paragraph.text.split(" ") - if '' not in para_words: - self.event['word_count'] += len(para_words) + if "" not in para_words: + self.event["word_count"] += len(para_words) - if 'FFFFFF' in self.event['font_colors']: - self.event['white_text_in_doc'] = True + if "FFFFFF" in self.event["font_colors"]: + self.event["white_text_in_doc"] = True if extract_text: - text = '' + text = "" for paragraph in docx_doc.paragraphs: - text += f'{paragraph.text}\n' + text += f"{paragraph.text}\n" # Send extracted file back to Strelka - self.emit_file(text.encode('utf-8'), name='text') + self.emit_file(text.encode("utf-8"), name="text") except ValueError: - self.flags.append('value_error') + self.flags.append("value_error") except zipfile.BadZipFile: - self.flags.append('bad_zip') + self.flags.append("bad_zip") except strelka.ScannerTimeout: raise except Exception: - self.flags.append('bad_doc') + self.flags.append("bad_doc") diff --git a/src/python/strelka/scanners/scan_elf.py b/src/python/strelka/scanners/scan_elf.py index d2038a6f..6a373312 100644 --- a/src/python/strelka/scanners/scan_elf.py +++ b/src/python/strelka/scanners/scan_elf.py @@ -8,43 +8,48 @@ class ScanElf(strelka.Scanner): """Collects metadata from ELF files.""" + def scan(self, data, file, options, expire_at): elf = ELF.parse(raw=list(data)) - self.event['total'] = { - 'libraries': len(elf.libraries), - 'relocations': len(elf.relocations), - 'sections': elf.header.numberof_sections, - 'segments': elf.header.numberof_segments, - 'symbols': len(elf.symbols), + self.event["total"] = { + "libraries": len(elf.libraries), + "relocations": len(elf.relocations), + "sections": elf.header.numberof_sections, + "segments": elf.header.numberof_segments, + "symbols": len(elf.symbols), } - self.event['nx'] = elf.has_nx - self.event['pie'] = elf.is_pie + self.event["nx"] = elf.has_nx + self.event["pie"] = elf.is_pie try: - self.event['header'] = { - 'endianness': str(elf.header.identity_data).split('.')[1], - 'entry_point': elf.header.entrypoint, - 'file': { - 'type': str(elf.header.file_type).split('.')[1], - 'version': str(elf.header.object_file_version).split('.')[1], + self.event["header"] = { + "endianness": str(elf.header.identity_data).split(".")[1], + "entry_point": elf.header.entrypoint, + "file": { + "type": str(elf.header.file_type).split(".")[1], + "version": str(elf.header.object_file_version).split(".")[1], }, - 'flags': { - 'arm': [str(f).split('.')[1] for f in elf.header.arm_flags_list], - 'hexagon': [str(f).split('.')[1] for f in elf.header.hexagon_flags_list], - 'mips': [str(f).split('.')[1] for f in elf.header.mips_flags_list], - 'ppc64': [str(f).split('.')[1] for f in elf.header.ppc64_flags_list], - 'processor': elf.header.processor_flag, + "flags": { + "arm": [str(f).split(".")[1] for f in elf.header.arm_flags_list], + "hexagon": [ + str(f).split(".")[1] for f in elf.header.hexagon_flags_list + ], + "mips": [str(f).split(".")[1] for f in elf.header.mips_flags_list], + "ppc64": [ + str(f).split(".")[1] for f in elf.header.ppc64_flags_list + ], + "processor": elf.header.processor_flag, }, - 'identity': { - 'class': str(elf.header.identity_class).split('.')[1], - 'data': str(elf.header.identity_data).split('.')[1], - 'os_abi': str(elf.header.identity_os_abi).split('.')[1], - 'version': str(elf.header.identity_version).split('.')[1], + "identity": { + "class": str(elf.header.identity_class).split(".")[1], + "data": str(elf.header.identity_data).split(".")[1], + "os_abi": str(elf.header.identity_os_abi).split(".")[1], + "version": str(elf.header.identity_version).split(".")[1], }, - 'machine': str(elf.header.machine_type).split('.')[1], - 'size': elf.header.header_size, + "machine": str(elf.header.machine_type).split(".")[1], + "size": elf.header.header_size, } except strelka.ScannerTimeout: raise @@ -52,96 +57,108 @@ def scan(self, data, file, options, expire_at): pass if elf.has_interpreter: - self.event['interpreter'] = elf.interpreter + self.event["interpreter"] = elf.interpreter - self.event.setdefault('relocations', []) - self.event['relocations'] = [] + self.event.setdefault("relocations", []) + self.event["relocations"] = [] for relo in elf.relocations: row = { - 'address': relo.address, - 'info': relo.info, - 'purpose': str(relo.purpose).split('.')[1], - 'size': relo.size, + "address": relo.address, + "info": relo.info, + "purpose": str(relo.purpose).split(".")[1], + "size": relo.size, } if relo.has_section: - row['section'] = relo.section.name + row["section"] = relo.section.name if relo.has_symbol: - row['symbol'] = relo.symbol.name + row["symbol"] = relo.symbol.name if elf.header.machine_type == ELF.ARCH.x86_64: - row['type'] = str(ELF.RELOCATION_X86_64(relo.type)).split('.')[1] + row["type"] = str(ELF.RELOCATION_X86_64(relo.type)).split(".")[1] elif elf.header.machine_type == ELF.ARCH.i386: - row['type'] = str(ELF.RELOCATION_i386(relo.type)).split('.')[1] + row["type"] = str(ELF.RELOCATION_i386(relo.type)).split(".")[1] elif elf.header.machine_type == ELF.ARCH.ARM: - row['type'] = str(ELF.RELOCATION_ARM(relo.type)).split('.')[1] + row["type"] = str(ELF.RELOCATION_ARM(relo.type)).split(".")[1] elif elf.header.machine_type == ELF.ARCH.AARCH64: - row['type'] = str(ELF.RELOCATION_AARCH64(relo.type)).split('.')[1] + row["type"] = str(ELF.RELOCATION_AARCH64(relo.type)).split(".")[1] else: - row['type'] = str(relo.type) + row["type"] = str(relo.type) - self.event['relocations'].append(row) + self.event["relocations"].append(row) - self.event['sections'] = [] + self.event["sections"] = [] try: for sec in elf.sections: - self.event['sections'].append({ - 'alignment': sec.alignment, - 'entropy': sec.entropy, - 'flags': [str(f).split('.')[1] for f in sec.flags_list], - 'name': sec.name, - 'offset': sec.offset, - 'size': sec.size, - 'type': str(sec.type).split('.')[1], - 'segments': [str(seg.type).split('.')[1] for seg in sec.segments], - }) + self.event["sections"].append( + { + "alignment": sec.alignment, + "entropy": sec.entropy, + "flags": [str(f).split(".")[1] for f in sec.flags_list], + "name": sec.name, + "offset": sec.offset, + "size": sec.size, + "type": str(sec.type).split(".")[1], + "segments": [ + str(seg.type).split(".")[1] for seg in sec.segments + ], + } + ) except strelka.ScannerTimeout: raise except Exception: pass - self.event['segments'] = [] + self.event["segments"] = [] try: for seg in elf.segments: - self.event['segments'].append({ - 'alignment': seg.alignment, - 'file_offset': seg.file_offset, - 'physical': { - 'address': seg.physical_address, - 'size': seg.physical_size, - }, - 'sections': [str(sec.name).split('.')[1] for sec in seg.sections], - 'type': str(seg.type).split('.')[1], - 'virtual': { - 'address': seg.virtual_address, - 'size': seg.virtual_size, - }, - }) + self.event["segments"].append( + { + "alignment": seg.alignment, + "file_offset": seg.file_offset, + "physical": { + "address": seg.physical_address, + "size": seg.physical_size, + }, + "sections": [ + str(sec.name).split(".")[1] for sec in seg.sections + ], + "type": str(seg.type).split(".")[1], + "virtual": { + "address": seg.virtual_address, + "size": seg.virtual_size, + }, + } + ) except strelka.ScannerTimeout: raise except Exception: pass - self.event['symbols'] = { - 'exported': [sym.name for sym in elf.exported_symbols], - 'imported': [sym.name for sym in elf.imported_symbols], - 'libraries': elf.libraries, - 'table': [], + self.event["symbols"] = { + "exported": [sym.name for sym in elf.exported_symbols], + "imported": [sym.name for sym in elf.imported_symbols], + "libraries": elf.libraries, + "table": [], } for sym in elf.symbols: - self.event['symbols']['table'].append({ - 'binding': str(sym.binding).rsplit('.')[1], - 'information': sym.information, - 'function': sym.is_function, - 'symbol': sym.name, - 'section_index': str(ELF.SYMBOL_SECTION_INDEX(sym.shndx)).rsplit('.')[1], - 'size': sym.size, - 'static': sym.is_static, - 'version': str(sym.symbol_version), - 'type': str(sym.type).rsplit('.')[1], - 'variable': sym.is_variable, - 'visibility': str(sym.visibility).rsplit('.')[1], - }) + self.event["symbols"]["table"].append( + { + "binding": str(sym.binding).rsplit(".")[1], + "information": sym.information, + "function": sym.is_function, + "symbol": sym.name, + "section_index": str(ELF.SYMBOL_SECTION_INDEX(sym.shndx)).rsplit( + "." + )[1], + "size": sym.size, + "static": sym.is_static, + "version": str(sym.symbol_version), + "type": str(sym.type).rsplit(".")[1], + "variable": sym.is_variable, + "visibility": str(sym.visibility).rsplit(".")[1], + } + ) diff --git a/src/python/strelka/scanners/scan_email.py b/src/python/strelka/scanners/scan_email.py index 3933934a..abebc89c 100644 --- a/src/python/strelka/scanners/scan_email.py +++ b/src/python/strelka/scanners/scan_email.py @@ -1,5 +1,6 @@ -import eml_parser import base64 + +import eml_parser import pytz from strelka import strelka @@ -10,125 +11,159 @@ class ScanEmail(strelka.Scanner): def scan(self, data, file, options, expire_at): attachments = [] - self.event['total'] = {'attachments': 0, 'extracted': 0} + self.event["total"] = {"attachments": 0, "extracted": 0} try: # Open and parse email byte string # If fail to open, return. try: - ep = eml_parser.EmlParser(include_attachment_data=True, include_raw_body=True) + ep = eml_parser.EmlParser( + include_attachment_data=True, include_raw_body=True + ) parsed_eml = ep.decode_email_bytes(data) except strelka.ScannerTimeout: raise except Exception: - self.flags.append('parse_load_error') + self.flags.append("parse_load_error") return # Check if email was parsed properly and attempt to deconflict and reload. # If fail to reparse, return. try: - if not (parsed_eml['header']['subject'] and parsed_eml['header']['header']): - if b'\nReceived: from ' in data: - data = (data.rpartition(b"\nReceived: from ")[1] + data.rpartition(b"\nReceived: from ")[ - 2])[1:] + if not ( + parsed_eml["header"]["subject"] and parsed_eml["header"]["header"] + ): + if b"\nReceived: from " in data: + data = ( + data.rpartition(b"\nReceived: from ")[1] + + data.rpartition(b"\nReceived: from ")[2] + )[1:] elif b"Start mail input; end with .\n" in data: - data = data.rpartition(b"Start mail input; end with .\n")[2] + data = data.rpartition( + b"Start mail input; end with .\n" + )[2] parsed_eml = ep.decode_email_bytes(data) - if not (parsed_eml['header']['subject'] and parsed_eml['header']['header']): - self.flags.append('parse_manual_email_error') + if not ( + parsed_eml["header"]["subject"] + and parsed_eml["header"]["header"] + ): + self.flags.append("parse_manual_email_error") return except strelka.ScannerTimeout: raise - except Exception as e: - self.flags.append('parse_manual_email_error') + except Exception: + self.flags.append("parse_manual_email_error") return # Body # If body exists in email, collect partial message contents and domains try: - if 'body' in parsed_eml: - for body in parsed_eml['body']: - if 'content_type' in body: - if body['content_type'] == 'text/plain': - if len(body['content']) <= 200: - self.event['body'] = body['content'] + if "body" in parsed_eml: + for body in parsed_eml["body"]: + if "content_type" in body: + if body["content_type"] == "text/plain": + if len(body["content"]) <= 200: + self.event["body"] = body["content"] else: - self.event['body'] = body['content'][:100] + '...' + body['content'][-100:] + self.event["body"] = ( + body["content"][:100] + + "..." + + body["content"][-100:] + ) else: - self.event['body'] = body['content'][:100] + '...' + body['content'][-100:] - if 'domain' in body: - if 'domain' in self.event: - self.event['domains'] += body['domain'] + self.event["body"] = ( + body["content"][:100] + "..." + body["content"][-100:] + ) + if "domain" in body: + if "domain" in self.event: + self.event["domains"] += body["domain"] else: - self.event['domains'] = body['domain'] + self.event["domains"] = body["domain"] except strelka.ScannerTimeout: raise except Exception: - self.flags.append('parse_body_error') + self.flags.append("parse_body_error") # Attachments # If attachments exist in email, collect attachment details and raw data to be resubmitted to pipeline. try: - if 'attachment' in parsed_eml: - self.event['attachments'] = {} - self.event['attachments']['filenames'] = [] - self.event['attachments']['hashes'] = [] - self.event['attachments']['totalsize'] = 0 - for attachment in parsed_eml['attachment']: - self.event['attachments']['filenames'].append(attachment["filename"]) - self.event['attachments']['hashes'].append(attachment['hash']['md5']) - self.event['attachments']['totalsize'] += attachment['size'] - attachments.append({ - 'name': attachment['filename'], - 'content-type': attachment['content_header']['content-type'][0], - 'raw': base64.b64decode(attachment['raw']) - } + if "attachment" in parsed_eml: + self.event["attachments"] = {} + self.event["attachments"]["filenames"] = [] + self.event["attachments"]["hashes"] = [] + self.event["attachments"]["totalsize"] = 0 + for attachment in parsed_eml["attachment"]: + self.event["attachments"]["filenames"].append( + attachment["filename"] + ) + self.event["attachments"]["hashes"].append( + attachment["hash"]["md5"] + ) + self.event["attachments"]["totalsize"] += attachment["size"] + attachments.append( + { + "name": attachment["filename"], + "content-type": attachment["content_header"][ + "content-type" + ][0], + "raw": base64.b64decode(attachment["raw"]), + } ) except strelka.ScannerTimeout: raise except Exception: - self.flags.append('parse_attachment_error') + self.flags.append("parse_attachment_error") # Header # Collect email header information try: - self.event['subject'] = parsed_eml['header']['subject'] - self.event['to'] = parsed_eml['header']['to'] - self.event['from'] = parsed_eml['header']['from'] - self.event['date_utc'] = parsed_eml['header']['date'].astimezone(pytz.utc).isoformat()[:-6] + '.000Z' - self.event['message_id'] = str(parsed_eml['header']['header']['message-id'][0][1:-1]) - if 'received_domain' in parsed_eml['header']: - self.event['received_domain'] = parsed_eml['header']['received_domain'] - if 'received_ip' in parsed_eml['header']: - self.event['received_ip'] = parsed_eml['header']['received_ip'] + self.event["subject"] = parsed_eml["header"]["subject"] + self.event["to"] = parsed_eml["header"]["to"] + self.event["from"] = parsed_eml["header"]["from"] + self.event["date_utc"] = ( + parsed_eml["header"]["date"].astimezone(pytz.utc).isoformat()[:-6] + + ".000Z" + ) + self.event["message_id"] = str( + parsed_eml["header"]["header"]["message-id"][0][1:-1] + ) + if "received_domain" in parsed_eml["header"]: + self.event["received_domain"] = parsed_eml["header"][ + "received_domain" + ] + if "received_ip" in parsed_eml["header"]: + self.event["received_ip"] = parsed_eml["header"]["received_ip"] except strelka.ScannerTimeout: raise except Exception: - self.flags.append('parse_header_error') + self.flags.append("parse_header_error") # If attachments were found, submit back into pipeline try: if attachments: for attachment in attachments: - self.event['total']['attachments'] += 1 + self.event["total"]["attachments"] += 1 - name = attachment['name'] + name = attachment["name"] try: - flavors = [attachment['content-type'].encode("utf-8").partition(b";")[0]] + flavors = [ + attachment["content-type"] + .encode("utf-8") + .partition(b";")[0] + ] except Exception: flavors = [] - self.flags.append('content_type_error') + self.flags.append("content_type_error") # Send extracted file back to Strelka - self.emit_file(attachment['raw'], name=name, flavors=flavors) + self.emit_file(attachment["raw"], name=name, flavors=flavors) - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 except strelka.ScannerTimeout: raise - except Exception as e: - raise - self.flags.append('extract_attachment_error') + except Exception: + self.flags.append("extract_attachment_error") except AssertionError: - self.flags.append('assertion_error') + self.flags.append("assertion_error") diff --git a/src/python/strelka/scanners/scan_encrypted_doc.py b/src/python/strelka/scanners/scan_encrypted_doc.py index 863288fc..eb41a49f 100644 --- a/src/python/strelka/scanners/scan_encrypted_doc.py +++ b/src/python/strelka/scanners/scan_encrypted_doc.py @@ -1,9 +1,10 @@ import io import os -import msoffcrypto import subprocess import tempfile +import msoffcrypto + from strelka import strelka @@ -13,7 +14,7 @@ def crack_word( jtr_path, tmp_dir, password_file, - min_length=1, + min_length=1, max_length=10, scanner_timeout=150, brute=False, @@ -81,7 +82,7 @@ def crack_word( ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, - ).communicate(timeout=scanner_timeout+5) + ).communicate(timeout=scanner_timeout + 5) if b"oldoffice" in stdout.split(b"\n")[0]: if stdout.split(b"\n")[2]: diff --git a/src/python/strelka/scanners/scan_encrypted_zip.py b/src/python/strelka/scanners/scan_encrypted_zip.py index 422ff868..03f1742a 100644 --- a/src/python/strelka/scanners/scan_encrypted_zip.py +++ b/src/python/strelka/scanners/scan_encrypted_zip.py @@ -1,10 +1,11 @@ -import subprocess -import tempfile import io import os -import pyzipper +import subprocess +import tempfile import zlib +import pyzipper + from strelka import strelka @@ -176,11 +177,13 @@ def scan(self, data, file, options, expire_at): if extract_data: # Send extracted file back to Strelka - self.emit_file(extract_data, name=file_item.filename) + self.emit_file( + extract_data, name=file_item.filename + ) self.event["total"]["extracted"] += 1 - except NotImplementedError as e: + except NotImplementedError: self.flags.append("unsupported_compression") except RuntimeError: self.flags.append("runtime_error") diff --git a/src/python/strelka/scanners/scan_entropy.py b/src/python/strelka/scanners/scan_entropy.py index d745dcfe..4f64c4dc 100644 --- a/src/python/strelka/scanners/scan_entropy.py +++ b/src/python/strelka/scanners/scan_entropy.py @@ -5,5 +5,6 @@ class ScanEntropy(strelka.Scanner): """Calculates entropy of files.""" + def scan(self, data, file, options, expire_at): - self.event['entropy'] = entropy.shannon_entropy(data) + self.event["entropy"] = entropy.shannon_entropy(data) diff --git a/src/python/strelka/scanners/scan_exception.py b/src/python/strelka/scanners/scan_exception.py index 432689d0..56b3e867 100644 --- a/src/python/strelka/scanners/scan_exception.py +++ b/src/python/strelka/scanners/scan_exception.py @@ -18,6 +18,5 @@ def init(self): pass def scan(self, data, file, options, expire_at): - limit = options.get("limit", 0) raise Exception("Scanner Exception") diff --git a/src/python/strelka/scanners/scan_exiftool.py b/src/python/strelka/scanners/scan_exiftool.py index ffdff664..786eb33b 100644 --- a/src/python/strelka/scanners/scan_exiftool.py +++ b/src/python/strelka/scanners/scan_exiftool.py @@ -15,16 +15,17 @@ class ScanExiftool(strelka.Scanner): tmp_directory: Location where tempfile writes temporary files. Defaults to '/tmp/'. """ + def scan(self, data, file, options, expire_at): - keys = options.get('keys', []) - tmp_directory = options.get('tmp_directory', '/tmp/') + keys = options.get("keys", []) + tmp_directory = options.get("tmp_directory", "/tmp/") with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: tmp_data.write(data) tmp_data.flush() (stdout, stderr) = subprocess.Popen( - ['exiftool', '-d', '"%s"', '-j', tmp_data.name], + ["exiftool", "-d", '"%s"', "-j", tmp_data.name], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, ).communicate() @@ -32,21 +33,23 @@ def scan(self, data, file, options, expire_at): if stdout: exiftool_dictionary = json.loads(stdout)[0] - self.event['keys'] = [] + self.event["keys"] = [] for k, v in exiftool_dictionary.items(): if keys and k not in keys: continue if isinstance(v, str): v = v.strip() - v = v.strip('\'"') + v = v.strip("'\"") try: v = ast.literal_eval(v) except (ValueError, SyntaxError): pass - self.event['keys'].append({ - 'key': k, - 'value': v, - }) + self.event["keys"].append( + { + "key": k, + "value": v, + } + ) diff --git a/src/python/strelka/scanners/scan_falcon_sandbox.py b/src/python/strelka/scanners/scan_falcon_sandbox.py index 44d6017a..0806ea83 100644 --- a/src/python/strelka/scanners/scan_falcon_sandbox.py +++ b/src/python/strelka/scanners/scan_falcon_sandbox.py @@ -1,7 +1,8 @@ import os + import requests -from requests.auth import HTTPBasicAuth import urllib3 +from requests.auth import HTTPBasicAuth from strelka import strelka @@ -36,23 +37,21 @@ class ScanFalconSandbox(strelka.Scanner): 100: ‘Windows 7 32 bit’ Defaults to [100] """ + def init(self): self.api_key = None self.api_secret = None - self.server = '' + self.server = "" self.auth_check = False self.depth = 0 self.env_id = [100] def submit_file(self, file, env_id): - url = self.server + '/api/submit' + url = self.server + "/api/submit" # TODO data is never referenced so this will crash - files = {'file': data} + files = {"file": None} # data - data = { - 'nosharevt': 1, - 'environmentId': env_id, - 'allowCommunityAccess': 1} + data = {"nosharevt": 1, "environmentId": env_id, "allowCommunityAccess": 1} try: response = requests.post( @@ -62,40 +61,46 @@ def submit_file(self, file, env_id): verify=False, files=files, timeout=self.timeout, - headers={'User-Agent': 'VxApi CLI Connector'}, + headers={"User-Agent": "VxApi CLI Connector"}, auth=(HTTPBasicAuth(self.api_key, self.api_secret)), ) - if response.status_code == 200 and response.json()['response_code'] == 0: - sha256 = response.json()['response']['sha256'] # Successfully submitted file - self.event['sha256'] = sha256 + if response.status_code == 200 and response.json()["response_code"] == 0: + sha256 = response.json()["response"][ + "sha256" + ] # Successfully submitted file + self.event["sha256"] = sha256 - elif response.status_code == 200 and response.json()['response_code'] == -1: - self.flags.append('duplicate_submission') # Submission Failed - duplicate + elif response.status_code == 200 and response.json()["response_code"] == -1: + self.flags.append( + "duplicate_submission" + ) # Submission Failed - duplicate else: - self.flags.append('upload_failed') # Upload Failed + self.flags.append("upload_failed") # Upload Failed except requests.exceptions.ConnectTimeout: - self.flags.append('connect_timeout') + self.flags.append("connect_timeout") return def scan(self, data, file, options, expire_at): - self.depth = options.get('depth', 0) + self.depth = options.get("depth", 0) if file.depth > self.depth: - self.flags.append('file_depth_exceeded') + self.flags.append("file_depth_exceeded") return - self.server = options.get('server', '') - self.priority = options.get('priority', 3) - self.timeout = options.get('timeout', 60) - self.env_id = options.get('env_id', [100]) + self.server = options.get("server", "") + self.priority = options.get("priority", 3) + self.timeout = options.get("timeout", 60) + self.env_id = options.get("env_id", [100]) if not self.auth_check: - self.api_key = options.get('api_key', None) or os.environ.get('FS_API_KEY') - self.api_secret = options.get('api_secret', None) or os.environ.get('FS_API_SECKEY') + self.api_key = options.get("api_key", None) or os.environ.get("FS_API_KEY") + self.api_secret = options.get("api_secret", None) or os.environ.get( + "FS_API_SECKEY" + ) self.auth_check = True # Allow submission to multiple environments (e.g. 32-bit and 64-bit) diff --git a/src/python/strelka/scanners/scan_floss.py b/src/python/strelka/scanners/scan_floss.py index 6b73366d..d2743b5a 100644 --- a/src/python/strelka/scanners/scan_floss.py +++ b/src/python/strelka/scanners/scan_floss.py @@ -16,11 +16,11 @@ class ScanFloss(strelka.Scanner): """ def scan(self, data, file, options, expire_at): - tmp_directory = options.get('tmp_directory', '/tmp/') - limit = options.get('limit', 100) + tmp_directory = options.get("tmp_directory", "/tmp/") + limit = options.get("limit", 100) - self.event['decoded'] = [] - self.event['stack'] = [] + self.event["decoded"] = [] + self.event["stack"] = [] try: with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: @@ -32,34 +32,43 @@ def scan(self, data, file, options, expire_at): with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_output: try: subprocess.Popen( - ['/tmp/floss', '-q', '--no-static-strings', '-o', tmp_output.name, tmp_data.name], + [ + "/tmp/floss", + "-q", + "--no-static-strings", + "-o", + tmp_output.name, + tmp_data.name, + ], stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL + stderr=subprocess.DEVNULL, ).communicate() floss_json = json.load(tmp_output) except strelka.ScannerTimeout: raise except Exception: - self.flags.append('error_processing') + self.flags.append("error_processing") return try: - if floss_json['strings']['decoded_strings']: - self.event['decoded'] = floss_json['strings']['decoded_strings'][:limit] - if floss_json['strings']['stack_strings']: - self.event['stack'] = floss_json['strings']['stack_strings'][:limit] + if floss_json["strings"]["decoded_strings"]: + self.event["decoded"] = floss_json["strings"][ + "decoded_strings" + ][:limit] + if floss_json["strings"]["stack_strings"]: + self.event["stack"] = floss_json["strings"][ + "stack_strings" + ][:limit] except strelka.ScannerTimeout: raise except Exception: - self.flags.append('error_parsing') + self.flags.append("error_parsing") return except strelka.ScannerTimeout: raise except Exception: - self.flags.append('error_execution') + self.flags.append("error_execution") except strelka.ScannerTimeout: raise except Exception: - self.flags.append('error_execution') - - + self.flags.append("error_execution") diff --git a/src/python/strelka/scanners/scan_footer.py b/src/python/strelka/scanners/scan_footer.py index 9b269510..f0f5c551 100644 --- a/src/python/strelka/scanners/scan_footer.py +++ b/src/python/strelka/scanners/scan_footer.py @@ -11,15 +11,16 @@ class ScanFooter(strelka.Scanner): Defaults to 50. encodings: List of which fields/encodings should be emitted, one of classic, raw, hex, backslash """ + def scan(self, data, file, options, expire_at): - length = options.get('length', 50) - encodings = options.get('encodings', ['classic']) + length = options.get("length", 50) + encodings = options.get("encodings", ["classic"]) - if 'classic' in encodings: - self.event['footer'] = data[-length:] - if 'raw' in encodings: - self.event['raw'] = data[-length:] - if 'hex' in encodings: - self.event['hex'] = binascii.hexlify(data[-length:]) - if 'backslash' in encodings: - self.event['backslash'] = str(data[-length:])[2:-1] + if "classic" in encodings: + self.event["footer"] = data[-length:] + if "raw" in encodings: + self.event["raw"] = data[-length:] + if "hex" in encodings: + self.event["hex"] = binascii.hexlify(data[-length:]) + if "backslash" in encodings: + self.event["backslash"] = str(data[-length:])[2:-1] diff --git a/src/python/strelka/scanners/scan_gif.py b/src/python/strelka/scanners/scan_gif.py index 4e7a5778..04ba7a9b 100644 --- a/src/python/strelka/scanners/scan_gif.py +++ b/src/python/strelka/scanners/scan_gif.py @@ -6,15 +6,16 @@ class ScanGif(strelka.Scanner): This scanner extracts data that is inserted past the GIF trailer. """ + def scan(self, data, file, options, expire_at): - if not data.endswith(b'\x00\x3b'): - trailer_index = data.rfind(b'\x00\x3b') + if not data.endswith(b"\x00\x3b"): + trailer_index = data.rfind(b"\x00\x3b") if trailer_index == -1: - self.flags.append('no_trailer') + self.flags.append("no_trailer") else: - trailer_data = data[trailer_index + 2:] + trailer_data = data[trailer_index + 2 :] if trailer_data: - self.event['trailer_index'] = trailer_index + self.event["trailer_index"] = trailer_index # Send extracted file back to Strelka self.emit_file(trailer_data) diff --git a/src/python/strelka/scanners/scan_gzip.py b/src/python/strelka/scanners/scan_gzip.py index 612ca378..f9d2ef10 100644 --- a/src/python/strelka/scanners/scan_gzip.py +++ b/src/python/strelka/scanners/scan_gzip.py @@ -6,11 +6,12 @@ class ScanGzip(strelka.Scanner): """Decompresses gzip files.""" + def scan(self, data, file, options, expire_at): with io.BytesIO(data) as gzip_io: with gzip.GzipFile(fileobj=gzip_io) as gzip_obj: decompressed = gzip_obj.read() - self.event['size'] = len(decompressed) + self.event["size"] = len(decompressed) # Send extracted file back to Strelka self.emit_file(decompressed, name=file.name) diff --git a/src/python/strelka/scanners/scan_hash.py b/src/python/strelka/scanners/scan_hash.py index 1d1ede30..b3ec6f2b 100644 --- a/src/python/strelka/scanners/scan_hash.py +++ b/src/python/strelka/scanners/scan_hash.py @@ -1,14 +1,17 @@ from hashlib import md5, sha1, sha256 + from ssdeep import hash as ssdeep_hash from tlsh import hash as tlsh_hash + from strelka import strelka class ScanHash(strelka.Scanner): """Calculates file hash values.""" + def scan(self, data, file, options, expire_at): - self.event['md5'] = md5(data).hexdigest() - self.event['sha1'] = sha1(data).hexdigest() - self.event['sha256'] = sha256(data).hexdigest() - self.event['ssdeep'] = ssdeep_hash(data) - self.event['tlsh'] = tlsh_hash(data) + self.event["md5"] = md5(data).hexdigest() + self.event["sha1"] = sha1(data).hexdigest() + self.event["sha256"] = sha256(data).hexdigest() + self.event["ssdeep"] = ssdeep_hash(data) + self.event["tlsh"] = tlsh_hash(data) diff --git a/src/python/strelka/scanners/scan_header.py b/src/python/strelka/scanners/scan_header.py index c92a6148..4604e414 100644 --- a/src/python/strelka/scanners/scan_header.py +++ b/src/python/strelka/scanners/scan_header.py @@ -11,15 +11,16 @@ class ScanHeader(strelka.Scanner): Defaults to 50. encodings: List of which fields/encodings should be emitted, one of classic, raw, hex, backslash """ + def scan(self, data, file, options, expire_at): - length = options.get('length', 50) - encodings = options.get('encodings', ['classic']) + length = options.get("length", 50) + encodings = options.get("encodings", ["classic"]) - if 'classic' in encodings: - self.event['header'] = data[:length] - if 'raw' in encodings: - self.event['raw'] = data[:length] - if 'hex' in encodings: - self.event['hex'] = binascii.hexlify(data[:length]) - if 'backslash' in encodings: - self.event['backslash'] = str(data[:length])[2:-1] + if "classic" in encodings: + self.event["header"] = data[:length] + if "raw" in encodings: + self.event["raw"] = data[:length] + if "hex" in encodings: + self.event["hex"] = binascii.hexlify(data[:length]) + if "backslash" in encodings: + self.event["backslash"] = str(data[:length])[2:-1] diff --git a/src/python/strelka/scanners/scan_html.py b/src/python/strelka/scanners/scan_html.py index 112f0331..6653e852 100644 --- a/src/python/strelka/scanners/scan_html.py +++ b/src/python/strelka/scanners/scan_html.py @@ -10,115 +10,120 @@ class ScanHtml(strelka.Scanner): parser: Sets the HTML parser used during scanning. Defaults to 'html.parser'. """ + def scan(self, data, file, options, expire_at): - parser = options.get('parser', 'html.parser') - max_hyperlinks = options.get('max_hyperlinks', 50) - - self.event['total'] = { - 'scripts': 0, - 'forms': 0, - 'inputs': 0, - 'frames': 0, - 'extracted': 0, + parser = options.get("parser", "html.parser") + max_hyperlinks = options.get("max_hyperlinks", 50) + + self.event["total"] = { + "scripts": 0, + "forms": 0, + "inputs": 0, + "frames": 0, + "extracted": 0, } try: soup = bs4.BeautifulSoup(data, parser) if soup.title: - self.event['title'] = soup.title.text + self.event["title"] = soup.title.text hyperlinks = [] - hyperlinks.extend(soup.find_all('a', href=True)) - hyperlinks.extend(soup.find_all('img', src=True)) - self.event.setdefault('hyperlinks', []) + hyperlinks.extend(soup.find_all("a", href=True)) + hyperlinks.extend(soup.find_all("img", src=True)) + self.event.setdefault("hyperlinks", []) for hyperlink in hyperlinks: - link = hyperlink.get('href') or hyperlink.get('src') + link = hyperlink.get("href") or hyperlink.get("src") - if link and link.startswith('data:') and ';base64,' in link: - hyperlink_data = link.split(';base64,')[1] - self.emit_file(hyperlink_data.encode(), name=f'base64_hyperlink') + if link and link.startswith("data:") and ";base64," in link: + hyperlink_data = link.split(";base64,")[1] + self.emit_file(hyperlink_data.encode(), name="base64_hyperlink") else: - if link not in self.event['hyperlinks']: - self.event['hyperlinks'].append(link) + if link not in self.event["hyperlinks"]: + self.event["hyperlinks"].append(link) # Gather count of links and reduce potential link duplicates and restrict amount of # links returned using the configurable max_hyperlinks. - if self.event['hyperlinks']: - self.event['hyperlinks_count'] = len(self.event['hyperlinks']) - self.event['hyperlinks'] = self.event['hyperlinks'][:max_hyperlinks] + if self.event["hyperlinks"]: + self.event["hyperlinks_count"] = len(self.event["hyperlinks"]) + self.event["hyperlinks"] = self.event["hyperlinks"][:max_hyperlinks] - forms = soup.find_all('form') - self.event['total']['forms'] = len(forms) - self.event.setdefault('forms', []) + forms = soup.find_all("form") + self.event["total"]["forms"] = len(forms) + self.event.setdefault("forms", []) for form in forms: form_entry = { - 'action': form.get('action'), - 'method': form.get('method'), + "action": form.get("action"), + "method": form.get("method"), } - if form_entry not in self.event['forms']: - self.event['forms'].append(form_entry) + if form_entry not in self.event["forms"]: + self.event["forms"].append(form_entry) frames = [] - frames.extend(soup.find_all('frame')) - frames.extend(soup.find_all('iframe')) - self.event['total']['frames'] = len(frames) - self.event.setdefault('frames', []) + frames.extend(soup.find_all("frame")) + frames.extend(soup.find_all("iframe")) + self.event["total"]["frames"] = len(frames) + self.event.setdefault("frames", []) for frame in frames: frame_entry = { - 'src': frame.get('src'), - 'name': frame.get('name'), - 'height': frame.get('height'), - 'width': frame.get('width'), - 'border': frame.get('border'), - 'id': frame.get('id'), - 'style': frame.get('style'), + "src": frame.get("src"), + "name": frame.get("name"), + "height": frame.get("height"), + "width": frame.get("width"), + "border": frame.get("border"), + "id": frame.get("id"), + "style": frame.get("style"), } - if frame_entry not in self.event['frames']: - self.event['frames'].append(frame_entry) + if frame_entry not in self.event["frames"]: + self.event["frames"].append(frame_entry) - inputs = soup.find_all('input') - self.event['total']['inputs'] = len(inputs) - self.event.setdefault('inputs', []) + inputs = soup.find_all("input") + self.event["total"]["inputs"] = len(inputs) + self.event.setdefault("inputs", []) for html_input in inputs: input_entry = { - 'type': html_input.get('type'), - 'name': html_input.get('name'), - 'value': html_input.get('value'), + "type": html_input.get("type"), + "name": html_input.get("name"), + "value": html_input.get("value"), } - if input_entry not in self.event['inputs']: - self.event['inputs'].append(input_entry) + if input_entry not in self.event["inputs"]: + self.event["inputs"].append(input_entry) - scripts = soup.find_all('script') - self.event['total']['scripts'] = len(scripts) - self.event.setdefault('scripts', []) + scripts = soup.find_all("script") + self.event["total"]["scripts"] = len(scripts) + self.event.setdefault("scripts", []) for (index, script) in enumerate(scripts): script_flavors = [ - script.get('language', '').lower(), - script.get('type', '').lower(), + script.get("language", "").lower(), + script.get("type", "").lower(), ] script_entry = { - 'src': script.get('src'), - 'language': script.get('language'), - 'type': script.get('type'), + "src": script.get("src"), + "language": script.get("language"), + "type": script.get("type"), } - if script_entry not in self.event['scripts']: - self.event['scripts'].append(script_entry) + if script_entry not in self.event["scripts"]: + self.event["scripts"].append(script_entry) if script.text: - self.emit_file(script.text.encode(), name=f'script_{index}', flavors=script_flavors) - self.event['total']['extracted'] += 1 - - spans = soup.find_all('span') - self.event['total']['spans'] = len(spans) - self.event.setdefault('spans', []) + self.emit_file( + script.text.encode(), + name=f"script_{index}", + flavors=script_flavors, + ) + self.event["total"]["extracted"] += 1 + + spans = soup.find_all("span") + self.event["total"]["spans"] = len(spans) + self.event.setdefault("spans", []) for span in spans: span_entry = { - 'class': span.get('class'), - 'style': span.get('style'), + "class": span.get("class"), + "style": span.get("style"), } - if span_entry not in self.event['spans']: - self.event['spans'].append(span_entry) + if span_entry not in self.event["spans"]: + self.event["spans"].append(span_entry) except TypeError: - self.flags.append('type_error') + self.flags.append("type_error") diff --git a/src/python/strelka/scanners/scan_ini.py b/src/python/strelka/scanners/scan_ini.py index ff95078b..ebc115c2 100644 --- a/src/python/strelka/scanners/scan_ini.py +++ b/src/python/strelka/scanners/scan_ini.py @@ -3,35 +3,38 @@ class ScanIni(strelka.Scanner): """Parses keys from INI files.""" - def scan(self, data, file, options, expire_at): - self.event['comments'] = [] - self.event['keys'] = [] - self.event['sections'] = [] - + def scan(self, data, file, options, expire_at): + self.event["comments"] = [] + self.event["keys"] = [] + self.event["sections"] = [] - section = '' + section = "" ini = data.splitlines() for key in ini: key = key.strip() if not key: continue - if key.startswith(b'[') and key.endswith(b']'): + if key.startswith(b"[") and key.endswith(b"]"): section = key[1:-1] - self.event['sections'].append(section) - elif key.startswith(b'#') or key.startswith(b';'): - self.event['comments'].append(key) + self.event["sections"].append(section) + elif key.startswith(b"#") or key.startswith(b";"): + self.event["comments"].append(key) else: - split_key = key.split(b'=') + split_key = key.split(b"=") if len(split_key) == 1: - self.event['keys'].append({ - 'section': section, - 'value': split_key[0].strip().strip(b'"\'"'), - }) + self.event["keys"].append( + { + "section": section, + "value": split_key[0].strip().strip(b'"\'"'), + } + ) elif len(split_key) == 2: - self.event['keys'].append({ - 'section': section, - 'name': split_key[0].strip().strip(b'"\'"'), - 'value': split_key[1].strip().strip(b'"\'"'), - }) + self.event["keys"].append( + { + "section": section, + "name": split_key[0].strip().strip(b'"\'"'), + "value": split_key[1].strip().strip(b'"\'"'), + } + ) diff --git a/src/python/strelka/scanners/scan_iso.py b/src/python/strelka/scanners/scan_iso.py index 77bbb04f..9e825245 100644 --- a/src/python/strelka/scanners/scan_iso.py +++ b/src/python/strelka/scanners/scan_iso.py @@ -1,9 +1,10 @@ -import io import collections import datetime -import pycdlib +import io +import pycdlib from pycdlib.dates import DirectoryRecordDate + from strelka import strelka diff --git a/src/python/strelka/scanners/scan_jar_manifest.py b/src/python/strelka/scanners/scan_jar_manifest.py index ebbeeb75..31d08216 100644 --- a/src/python/strelka/scanners/scan_jar_manifest.py +++ b/src/python/strelka/scanners/scan_jar_manifest.py @@ -5,20 +5,21 @@ class ScanJarManifest(strelka.Scanner): """Collects metadata from JAR manifest files.""" + def scan(self, data, file, options, expire_at): - headers = options.get('headers', []) + headers = options.get("headers", []) - manifest = b'\n'.join(data.splitlines()).rstrip(b'\n') - section_strings = manifest.split(b'\n') + manifest = b"\n".join(data.splitlines()).rstrip(b"\n") + section_strings = manifest.split(b"\n") - self.event['headers'] = [] + self.event["headers"] = [] for section in section_strings: - s = section.replace(b'\n', b'').split(b':') + s = section.replace(b"\n", b"").split(b":") if len(s) == 2: h, v = s[0].strip(), s[1].strip() - if h not in self.event['headers']: - self.event['headers'].append(h) + if h not in self.event["headers"]: + self.event["headers"].append(h) if headers and h not in headers: continue @@ -28,7 +29,9 @@ def scan(self, data, file, options, expire_at): except (ValueError, SyntaxError): pass - self.event['headers'].append({ - 'header': h, - 'value': v, - }) + self.event["headers"].append( + { + "header": h, + "value": v, + } + ) diff --git a/src/python/strelka/scanners/scan_json.py b/src/python/strelka/scanners/scan_json.py index 4dbd1ea5..62efa347 100644 --- a/src/python/strelka/scanners/scan_json.py +++ b/src/python/strelka/scanners/scan_json.py @@ -5,16 +5,17 @@ class ScanJson(strelka.Scanner): """Collects keys from JSON files.""" + def scan(self, data, file, options, expire_at): - self.event.setdefault('keys', []) + self.event.setdefault("keys", []) try: self._get_keys(self, json.loads(data.decode())) except UnicodeDecodeError: - self.flags.append('unicode_decode_error') + self.flags.append("unicode_decode_error") except json.decoder.JSONDecodeError: - self.flags.append('json_decode_error') + self.flags.append("json_decode_error") @staticmethod def _get_keys(self, variable): @@ -25,8 +26,8 @@ def _get_keys(self, variable): """ if isinstance(variable, dict): for (key, value) in variable.items(): - if key not in self.event['keys']: - self.event['keys'].append(key) + if key not in self.event["keys"]: + self.event["keys"].append(key) self._get_keys(self, value) elif isinstance(variable, list): for v in variable: diff --git a/src/python/strelka/scanners/scan_libarchive.py b/src/python/strelka/scanners/scan_libarchive.py index 14f53222..c361cf4b 100644 --- a/src/python/strelka/scanners/scan_libarchive.py +++ b/src/python/strelka/scanners/scan_libarchive.py @@ -10,10 +10,11 @@ class ScanLibarchive(strelka.Scanner): limit: Maximum number of files to extract. Defaults to 1000. """ + def scan(self, data, file, options, expire_at): - file_limit = options.get('limit', 1000) + file_limit = options.get("limit", 1000) - self.event['total'] = {'files': 0, 'extracted': 0} + self.event["total"] = {"files": 0, "extracted": 0} try: with libarchive.memory_reader(data) as archive: @@ -22,23 +23,23 @@ def scan(self, data, file, options, expire_at): # the files before trying to extract them in case an error occurs for entry in archive: if entry.isfile: - self.event['total']['files'] += 1 + self.event["total"]["files"] += 1 with libarchive.memory_reader(data) as archive: for entry in archive: if entry.isfile: - if self.event['total']['extracted'] >= file_limit: + if self.event["total"]["extracted"] >= file_limit: continue - extracted_data = b'' + extracted_data = b"" for block in entry.get_blocks(): extracted_data += block # Send extracted file back to Strelka self.emit_file(extracted_data, name=entry.pathname) - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 except libarchive.ArchiveError: - self.flags.append('libarchive_archive_error') + self.flags.append("libarchive_archive_error") diff --git a/src/python/strelka/scanners/scan_lnk.py b/src/python/strelka/scanners/scan_lnk.py index 999e4f64..7b1d7e1d 100644 --- a/src/python/strelka/scanners/scan_lnk.py +++ b/src/python/strelka/scanners/scan_lnk.py @@ -1,6 +1,14 @@ import uuid -from strelka.cstructs.lnk import ShellLinkHeader, LinkTargetIDList, LinkInfo, CommonNetworkRelativeLink, ExtraData -from construct import Struct, Int16ul, Bytes, StringEncoded, this, IfThenElse + +from construct import Bytes, IfThenElse, Int16ul, StringEncoded, Struct, this +from strelka.cstructs.lnk import ( + CommonNetworkRelativeLink, + ExtraData, + LinkInfo, + LinkTargetIDList, + ShellLinkHeader, +) + from strelka import strelka @@ -24,16 +32,19 @@ def scan(self, data, file, options, expire_at): if header.LinkFlags.HasLinkInfo: linkinfo = LinkInfo.parse(data[offset:]) if linkinfo.VolumeID.DriveType: - self.event['drive_type'] = linkinfo.VolumeID.DriveType + self.event["drive_type"] = linkinfo.VolumeID.DriveType if linkinfo.VolumeID.DriveSerialNumber: - self.event["drive_serial_number"] = '{0:x}'.format(linkinfo.VolumeID.DriveSerialNumber) + self.event["drive_serial_number"] = "{0:x}".format( + linkinfo.VolumeID.DriveSerialNumber + ) if linkinfo.VolumeID.Data: self.event["volume_label"] = linkinfo.VolumeID.Data if linkinfo.LocalBasePath: self.event["local_base_path"] = linkinfo.LocalBasePath if linkinfo.CommonNetworkRelativeLink: commonnetworkrelativelink = CommonNetworkRelativeLink.parse( - data[offset + linkinfo.CommonNetworkRelativeLinkOffset:]) + data[offset + linkinfo.CommonNetworkRelativeLinkOffset :] + ) self.event["net_name"] = commonnetworkrelativelink.NetName offset += linkinfo.LinkInfoSize except strelka.ScannerTimeout: @@ -43,8 +54,12 @@ def scan(self, data, file, options, expire_at): StringData = "StringData" / Struct( "CountCharacters" / Int16ul, - "String" / IfThenElse(header.LinkFlags.IsUnicode, StringEncoded(Bytes(this.CountCharacters * 2), "utf16"), - StringEncoded(Bytes(this.CountCharacters), "utf8")) + "String" + / IfThenElse( + header.LinkFlags.IsUnicode, + StringEncoded(Bytes(this.CountCharacters * 2), "utf16"), + StringEncoded(Bytes(this.CountCharacters), "utf8"), + ), ) try: @@ -52,9 +67,9 @@ def scan(self, data, file, options, expire_at): NAME_STRING = StringData.parse(data[offset:]) self.event["name_string"] = NAME_STRING.String if header.LinkFlags.IsUnicode: - offset += (len(NAME_STRING.String) * 2 + 2) + offset += len(NAME_STRING.String) * 2 + 2 else: - offset += (len(NAME_STRING.String) + 2) + offset += len(NAME_STRING.String) + 2 except strelka.ScannerTimeout: raise except Exception: @@ -65,9 +80,9 @@ def scan(self, data, file, options, expire_at): RELATIVE_PATH = StringData.parse(data[offset:]) self.event["relative_path"] = RELATIVE_PATH.String if header.LinkFlags.IsUnicode: - offset += (len(RELATIVE_PATH.String) * 2 + 2) + offset += len(RELATIVE_PATH.String) * 2 + 2 else: - offset += (len(RELATIVE_PATH.String) + 2) + offset += len(RELATIVE_PATH.String) + 2 except strelka.ScannerTimeout: raise except Exception: @@ -78,9 +93,9 @@ def scan(self, data, file, options, expire_at): WORKING_DIR = StringData.parse(data[offset:]) self.event["working_dir"] = WORKING_DIR.String if header.LinkFlags.IsUnicode: - offset += (len(WORKING_DIR.String) * 2 + 2) + offset += len(WORKING_DIR.String) * 2 + 2 else: - offset += (len(WORKING_DIR.String) + 2) + offset += len(WORKING_DIR.String) + 2 except strelka.ScannerTimeout: raise except Exception: @@ -91,9 +106,9 @@ def scan(self, data, file, options, expire_at): COMMAND_LINE_ARGUMENTS = StringData.parse(data[offset:]) self.event["command_line_args"] = COMMAND_LINE_ARGUMENTS.String if header.LinkFlags.IsUnicode: - offset += (len(COMMAND_LINE_ARGUMENTS.String) * 2 + 2) + offset += len(COMMAND_LINE_ARGUMENTS.String) * 2 + 2 else: - offset += (len(COMMAND_LINE_ARGUMENTS.String) + 2) + offset += len(COMMAND_LINE_ARGUMENTS.String) + 2 except strelka.ScannerTimeout: raise except Exception: @@ -104,9 +119,9 @@ def scan(self, data, file, options, expire_at): ICON_LOCATION = StringData.parse(data[offset:]) self.event["icon_location"] = ICON_LOCATION.String if header.LinkFlags.IsUnicode: - offset += (len(ICON_LOCATION.String) * 2 + 2) + offset += len(ICON_LOCATION.String) * 2 + 2 else: - offset += (len(ICON_LOCATION.String) + 2) + offset += len(ICON_LOCATION.String) + 2 except strelka.ScannerTimeout: raise except Exception: @@ -125,15 +140,21 @@ def scan(self, data, file, options, expire_at): try: if extradata.IconEnvironmentDataBlock: - self.event["icon_target"] = extradata.IconEnvironmentDataBlock.TargetAnsi + self.event[ + "icon_target" + ] = extradata.IconEnvironmentDataBlock.TargetAnsi except strelka.ScannerTimeout: raise except Exception: self.flags.append("Unable to parse IconEnvironmentDataBlock") if extradata.TrackerDataBlock: - self.event["machine_id"] = extradata.TrackerDataBlock.MachineID.strip(b'\x00') - self.event["mac"] = str(uuid.UUID(bytes_le=extradata.TrackerDataBlock.Droid[16:])).split('-')[-1] + self.event[ + "machine_id" + ] = extradata.TrackerDataBlock.MachineID.strip(b"\x00") + self.event["mac"] = str( + uuid.UUID(bytes_le=extradata.TrackerDataBlock.Droid[16:]) + ).split("-")[-1] offset += extradata.BlockSize diff --git a/src/python/strelka/scanners/scan_lsb.py b/src/python/strelka/scanners/scan_lsb.py index 6939f970..41cae34b 100644 --- a/src/python/strelka/scanners/scan_lsb.py +++ b/src/python/strelka/scanners/scan_lsb.py @@ -7,25 +7,16 @@ class ScanLsb(strelka.Scanner): """This scanner checks if there is any hidden strings at the end of each RGB value""" - def scan(self,data,file,options, expire_at): + def scan(self, data, file, options, expire_at): try: - ans=False image = np.fromstring(data, np.uint8) image = cv2.imdecode(image, cv2.IMREAD_COLOR) bits = self._get_bits(image) bytes_ = self._get_bytes(bits) chars = [] chars.append(self._convert_bytes_to_text(bytes_)) - flag=(''.join(chars).encode('ascii', 'ignore')) - if (len(flag)>1): - ans=True - self.event['lsb'] = ans - #print("This Image might have something stored in") - else: - extract_file = strelka.File ( - source = self.name - ) - self.event['lsb'] = ans + flag = "".join(chars).encode("ascii", "ignore") + self.event["lsb"] = len(flag) > 1 except AttributeError: self.flags.append("bits_image_error") except cv2.error: @@ -33,13 +24,12 @@ def scan(self,data,file,options, expire_at): def _get_bits(self, img): h, w, t = img.shape - bits = '' + bits = "" for x in range(0, h): for y in range(0, w): - l=img[x,y] - length=len(l) - for k in l: + lst = img[x, y] + for k in lst: bits += bin(k)[-1] return bits @@ -52,6 +42,6 @@ def _convert_bytes_to_text(self, bytes_): def _get_bytes(self, bits): bytes_ = [] for i in range(int(len(bits) / 8)): - bytes_.append(bits[i * 8:(i + 1) * 8]) - #print(bytes_) - return bytes_ \ No newline at end of file + bytes_.append(bits[i * 8 : (i + 1) * 8]) + # print(bytes_) + return bytes_ diff --git a/src/python/strelka/scanners/scan_lzma.py b/src/python/strelka/scanners/scan_lzma.py index 60940419..11b85199 100644 --- a/src/python/strelka/scanners/scan_lzma.py +++ b/src/python/strelka/scanners/scan_lzma.py @@ -6,19 +6,20 @@ class ScanLzma(strelka.Scanner): """Decompresses LZMA files.""" + def scan(self, data, file, options, expire_at): try: with io.BytesIO(data) as lzma_io: with lzma.LZMAFile(filename=lzma_io) as lzma_obj: try: decompressed = lzma_obj.read() - self.event['size'] = len(decompressed) + self.event["size"] = len(decompressed) # Send extracted file back to Strelka self.emit_file(decompressed, name=file.name) except EOFError: - self.flags.append('eof_error') + self.flags.append("eof_error") except lzma.LZMAError: - self.flags.append('lzma_error') + self.flags.append("lzma_error") diff --git a/src/python/strelka/scanners/scan_macho.py b/src/python/strelka/scanners/scan_macho.py index c429b1bf..5c5a5d70 100644 --- a/src/python/strelka/scanners/scan_macho.py +++ b/src/python/strelka/scanners/scan_macho.py @@ -5,195 +5,186 @@ from strelka import strelka CPU_SUBTYPES = { - 'ANY': { - -2: 'ANY', - -1: 'MULTIPLE', - 0: 'LITTLE_ENDIAN', - 1: 'BIG_ENDIAN' + "ANY": {-2: "ANY", -1: "MULTIPLE", 0: "LITTLE_ENDIAN", 1: "BIG_ENDIAN"}, + "x86": { + -2: "x86 (I386)", + -1: "MULITPLE", + 0: "INTEL_MODEL_ALL", + 3: "x86_ALL, x86_64_ALL, I386_ALL, or 386", + 4: "x86_ARCH1 or 486", + 5: "586 or PENT", + 8: "x86_64_H or PENTIUM_3", + 9: "PENTIUM_M", + 10: "PENTIUM_4", + 11: "ITANIUM", + 12: "XEON", + 15: "INTEL_FAMILY_MAX", + 22: "PENTPRO", + 24: "PENTIUM_3_M", + 26: "PENTIUM_4_M", + 27: "ITANIUM_2", + 28: "XEON_MP", + 40: "PENTIUM_3_XEON", + 54: "PENTII_M3", + 86: "PENTII_M5", + 103: "CELERON", + 119: "CELERON_MOBILE", + 132: "486SX", }, - 'x86': { - -2: 'x86 (I386)', - -1: 'MULITPLE', - 0: 'INTEL_MODEL_ALL', - 3: 'x86_ALL, x86_64_ALL, I386_ALL, or 386', - 4: 'x86_ARCH1 or 486', - 5: '586 or PENT', - 8: 'x86_64_H or PENTIUM_3', - 9: 'PENTIUM_M', - 10: 'PENTIUM_4', - 11: 'ITANIUM', - 12: 'XEON', - 15: 'INTEL_FAMILY_MAX', - 22: 'PENTPRO', - 24: 'PENTIUM_3_M', - 26: 'PENTIUM_4_M', - 27: 'ITANIUM_2', - 28: 'XEON_MP', - 40: 'PENTIUM_3_XEON', - 54: 'PENTII_M3', - 86: 'PENTII_M5', - 103: 'CELERON', - 119: 'CELERON_MOBILE', - 132: '486SX' + "MC98000": {-2: "MC98000", -1: "MULTIPLE", 0: "MC98000_ALL", 1: "MC98601"}, + "ARM": { + -2: "ARM", + -1: "MULTIPLE", + 0: "ARM_ALL", + 1: "ARM_A500_ARCH", + 2: "ARM_A500", + 3: "ARM_A440", + 4: "ARM_M4", + 5: "ARM_V4T", + 6: "ARM_V6", + 7: "ARM_V5TEJ", + 8: "ARM_XSCALE", + 9: "ARM_V7", + 10: "ARM_V7F", + 11: "ARM_V7S", + 12: "ARM_V7K", + 13: "ARM_V8", + 14: "ARM_V6M", + 15: "ARM_V7M", + 16: "ARM_V7EM", }, - 'MC98000': { - -2: 'MC98000', - -1: 'MULTIPLE', - 0: 'MC98000_ALL', - 1: 'MC98601' + "SPARC": { + -2: "SPARC", + -1: "MULTIPLE", + 0: "SPARC_ALL or SUN4_ALL", + 1: "SUN4_260", + 2: "SUN4_110", }, - 'ARM': { - -2: 'ARM', - -1: 'MULTIPLE', - 0: 'ARM_ALL', - 1: 'ARM_A500_ARCH', - 2: 'ARM_A500', - 3: 'ARM_A440', - 4: 'ARM_M4', - 5: 'ARM_V4T', - 6: 'ARM_V6', - 7: 'ARM_V5TEJ', - 8: 'ARM_XSCALE', - 9: 'ARM_V7', - 10: 'ARM_V7F', - 11: 'ARM_V7S', - 12: 'ARM_V7K', - 13: 'ARM_V8', - 14: 'ARM_V6M', - 15: 'ARM_V7M', - 16: 'ARM_V7EM' + "POWERPC": { + -2: "POWERPC", + -1: "MULTIPLE", + 0: "POWERPC_ALL", + 1: "POWERPC_601", + 2: "POWERPC_602", + 3: "POWERPC_603", + 4: "POWERPC_603e", + 5: "POWERPC_603ev", + 6: "POWERPC_604", + 7: "POWERPC_604e", + 8: "POWERPC_620", + 9: "POWERPC_750", + 10: "POWERPC_7400", + 11: "POWERPC_7450", + 100: "POWERPC_970", }, - 'SPARC': { - -2: 'SPARC', - -1: 'MULTIPLE', - 0: 'SPARC_ALL or SUN4_ALL', - 1: 'SUN4_260', - 2: 'SUN4_110' + "x86_64": { + -2: "x86_64", + -1: "MULTIPLE", + 0: "INTEL_MODEL_ALL", + 3: "x86_ALL, x86_64_ALL, I386_ALL, or 386", + 4: "x86_ARCH1 or 486", + 5: "586 or PENT", + 8: "x86_64_H or PENTIUM_3", + 9: "PENTIUM_M", + 10: "PENTIUM_4", + 11: "ITANIUM", + 12: "XEON", + 15: "INTEL_FAMILY_MAX", + 22: "PENTPRO", + 24: "PENTIUM_3_M", + 26: "PENTIUM_4_M", + 27: "ITANIUM_2", + 28: "XEON_MP", + 40: "PENTIUM_3_XEON", + 54: "PENTII_M3", + 86: "PENTII_M5", + 103: "CELERON", + 119: "CELERON_MOBILE", + 132: "486SX", + 2147483648 + 0: "INTEL_MODEL_ALL", + 2147483648 + 3: "x86_ALL, x86_64_ALL, I386_ALL, or 386", + 2147483648 + 4: "x86_ARCH1 or 486", + 2147483648 + 5: "586 or PENT", + 2147483648 + 8: "x86_64_H or PENTIUM_3", + 2147483648 + 9: "PENTIUM_M", + 2147483648 + 10: "PENTIUM_4", + 2147483648 + 11: "ITANIUM", + 2147483648 + 12: "XEON", + 2147483648 + 15: "INTEL_FAMILY_MAX", + 2147483648 + 22: "PENTPRO", + 2147483648 + 24: "PENTIUM_3_M", + 2147483648 + 26: "PENTIUM_4_M", + 2147483648 + 27: "ITANIUM_2", + 2147483648 + 28: "XEON_MP", + 2147483648 + 40: "PENTIUM_3_XEON", + 2147483648 + 54: "PENTII_M3", + 2147483648 + 86: "PENTII_M5", + 2147483648 + 103: "CELERON", + 2147483648 + 119: "CELERON_MOBILE", + 2147483648 + 132: "486SX", }, - 'POWERPC': { - -2: 'POWERPC', - -1: 'MULTIPLE', - 0: 'POWERPC_ALL', - 1: 'POWERPC_601', - 2: 'POWERPC_602', - 3: 'POWERPC_603', - 4: 'POWERPC_603e', - 5: 'POWERPC_603ev', - 6: 'POWERPC_604', - 7: 'POWERPC_604e', - 8: 'POWERPC_620', - 9: 'POWERPC_750', - 10: 'POWERPC_7400', - 11: 'POWERPC_7450', - 100: 'POWERPC_970' + "ARM64": { + -2: "ARM64", + -1: "MULTIPLE", + 0: "ARM64_ALL", + 1: "ARM64_V8", + 2147483648 + 0: "ARM64_ALL", + 2147483648 + 1: "ARM64_V8", }, - 'x86_64': { - -2: 'x86_64', - -1: 'MULTIPLE', - 0: 'INTEL_MODEL_ALL', - 3: 'x86_ALL, x86_64_ALL, I386_ALL, or 386', - 4: 'x86_ARCH1 or 486', - 5: '586 or PENT', - 8: 'x86_64_H or PENTIUM_3', - 9: 'PENTIUM_M', - 10: 'PENTIUM_4', - 11: 'ITANIUM', - 12: 'XEON', - 15: 'INTEL_FAMILY_MAX', - 22: 'PENTPRO', - 24: 'PENTIUM_3_M', - 26: 'PENTIUM_4_M', - 27: 'ITANIUM_2', - 28: 'XEON_MP', - 40: 'PENTIUM_3_XEON', - 54: 'PENTII_M3', - 86: 'PENTII_M5', - 103: 'CELERON', - 119: 'CELERON_MOBILE', - 132: '486SX', - 2147483648 + 0: 'INTEL_MODEL_ALL', - 2147483648 + 3: 'x86_ALL, x86_64_ALL, I386_ALL, or 386', - 2147483648 + 4: 'x86_ARCH1 or 486', - 2147483648 + 5: '586 or PENT', - 2147483648 + 8: 'x86_64_H or PENTIUM_3', - 2147483648 + 9: 'PENTIUM_M', - 2147483648 + 10: 'PENTIUM_4', - 2147483648 + 11: 'ITANIUM', - 2147483648 + 12: 'XEON', - 2147483648 + 15: 'INTEL_FAMILY_MAX', - 2147483648 + 22: 'PENTPRO', - 2147483648 + 24: 'PENTIUM_3_M', - 2147483648 + 26: 'PENTIUM_4_M', - 2147483648 + 27: 'ITANIUM_2', - 2147483648 + 28: 'XEON_MP', - 2147483648 + 40: 'PENTIUM_3_XEON', - 2147483648 + 54: 'PENTII_M3', - 2147483648 + 86: 'PENTII_M5', - 2147483648 + 103: 'CELERON', - 2147483648 + 119: 'CELERON_MOBILE', - 2147483648 + 132: '486SX' + "POWERPC64": { + -2: "POWERPC64", + -1: "MULTIPLE", + 0: "POWERPC_ALL", + 1: "POWERPC_601", + 2: "POWERPC_602", + 3: "POWERPC_603", + 4: "POWERPC_603e", + 5: "POWERPC_603ev", + 6: "POWERPC_604", + 7: "POWERPC_604e", + 8: "POWERPC_620", + 9: "POWERPC_750", + 10: "POWERPC_7400", + 11: "POWERPC_7450", + 100: "POWERPC_970", + 2147483648 + 0: "POWERPC_ALL (LIB64)", + 2147483648 + 1: "POWERPC_601 (LIB64)", + 2147483648 + 2: "POWERPC_602 (LIB64)", + 2147483648 + 3: "POWERPC_603 (LIB64)", + 2147483648 + 4: "POWERPC_603e (LIB64)", + 2147483648 + 5: "POWERPC_603ev (LIB64)", + 2147483648 + 6: "POWERPC_604 (LIB64)", + 2147483648 + 7: "POWERPC_604e (LIB64)", + 2147483648 + 8: "POWERPC_620 (LIB64)", + 2147483648 + 9: "POWERPC_750 (LIB64)", + 2147483648 + 10: "POWERPC_7400 (LIB64)", + 2147483648 + 11: "POWERPC_7450 (LIB64)", + 2147483648 + 100: "POWERPC_970 (LIB64)", }, - 'ARM64': { - -2: 'ARM64', - -1: 'MULTIPLE', - 0: 'ARM64_ALL', - 1: 'ARM64_V8', - 2147483648 + 0: 'ARM64_ALL', - 2147483648 + 1: 'ARM64_V8' - }, - 'POWERPC64': { - -2: 'POWERPC64', - -1: 'MULTIPLE', - 0: 'POWERPC_ALL', - 1: 'POWERPC_601', - 2: 'POWERPC_602', - 3: 'POWERPC_603', - 4: 'POWERPC_603e', - 5: 'POWERPC_603ev', - 6: 'POWERPC_604', - 7: 'POWERPC_604e', - 8: 'POWERPC_620', - 9: 'POWERPC_750', - 10: 'POWERPC_7400', - 11: 'POWERPC_7450', - 100: 'POWERPC_970', - 2147483648 + 0: 'POWERPC_ALL (LIB64)', - 2147483648 + 1: 'POWERPC_601 (LIB64)', - 2147483648 + 2: 'POWERPC_602 (LIB64)', - 2147483648 + 3: 'POWERPC_603 (LIB64)', - 2147483648 + 4: 'POWERPC_603e (LIB64)', - 2147483648 + 5: 'POWERPC_603ev (LIB64)', - 2147483648 + 6: 'POWERPC_604 (LIB64)', - 2147483648 + 7: 'POWERPC_604e (LIB64)', - 2147483648 + 8: 'POWERPC_620 (LIB64)', - 2147483648 + 9: 'POWERPC_750 (LIB64)', - 2147483648 + 10: 'POWERPC_7400 (LIB64)', - 2147483648 + 11: 'POWERPC_7450 (LIB64)', - 2147483648 + 100: 'POWERPC_970 (LIB64)' - } } PROTECTIONS = { - 0: '---', - 1: 'r--', - 2: '-w-', - 3: 'rw-', - 4: '--x', - 5: 'r-x', - 6: '-wx', - 7: 'rwx', + 0: "---", + 1: "r--", + 2: "-w-", + 3: "rw-", + 4: "--x", + 5: "r-x", + 6: "-wx", + 7: "rwx", } class ScanMacho(strelka.Scanner): """Collects metadata from Mach-O files.""" + def scan(self, data, file, options, expire_at): - tmp_directory = options.get('tmp_directory', '/tmp/') + tmp_directory = options.get("tmp_directory", "/tmp/") macho = MachO.parse(raw=list(data), config=MachO.ParserConfig.deep) - self.event['total'] = { - 'binaries': macho.size, + self.event["total"] = { + "binaries": macho.size, } if macho.size > 1: @@ -203,356 +194,366 @@ def scan(self, data, file, options, expire_at): b.write(tmp_data.name) tmp_data.flush() - with open(tmp_data.name, 'rb') as f: + with open(tmp_data.name, "rb") as f: # Send extracted file back to Strelka - self.emit_file(f.read(), name=f'binary_{r}') + self.emit_file(f.read(), name=f"binary_{r}") return binary = macho.at(0) - self.event['total'] = { - **self.event['total'], - 'commands': binary.header.nb_cmds, - 'libraries': len(binary.libraries), - 'relocations': len(binary.relocations), - 'sections': len(binary.sections), - 'segments': len(binary.segments), - 'symbols': len(binary.symbols), + self.event["total"] = { + **self.event["total"], + "commands": binary.header.nb_cmds, + "libraries": len(binary.libraries), + "relocations": len(binary.relocations), + "sections": len(binary.sections), + "segments": len(binary.segments), + "symbols": len(binary.symbols), } - self.event['nx'] = binary.has_nx - self.event['pie'] = binary.is_pie + self.event["nx"] = binary.has_nx + self.event["pie"] = binary.is_pie - cpu_type = str(binary.header.cpu_type).split('.')[1] - if cpu_type != '???': + cpu_type = str(binary.header.cpu_type).split(".")[1] + if cpu_type != "???": cpu_subtype = CPU_SUBTYPES[cpu_type][binary.header.cpu_subtype] else: cpu_subtype = str(binary.header.cpu_subtype) - self.event['header'] = { - 'cpu': { - 'primary': cpu_type, - 'sub': cpu_subtype, + self.event["header"] = { + "cpu": { + "primary": cpu_type, + "sub": cpu_subtype, }, - 'file': str(binary.header.file_type).split('.')[1], - 'flags': [str(flag).split('.')[1] for flag in binary.header.flags_list], + "file": str(binary.header.file_type).split(".")[1], + "flags": [str(flag).split(".")[1] for flag in binary.header.flags_list], } - self.event['relocations'] = [] + self.event["relocations"] = [] for relo in binary.relocations: row = { - 'address': relo.address, - 'size': relo.size, + "address": relo.address, + "size": relo.size, } if relo.has_section: - row['section'] = relo.section.name + row["section"] = relo.section.name if relo.has_segment: - row['segment'] = relo.segment.name + row["segment"] = relo.segment.name if relo.has_symbol: - row['symbol'] = relo.symbol.name + row["symbol"] = relo.symbol.name - self.event['relocations'].append(row) + self.event["relocations"].append(row) - self.event['sections'] = [] + self.event["sections"] = [] for sec in binary.sections: - self.event['sections'].append({ - 'alignment': sec.alignment, - 'entropy': sec.entropy, - 'name': sec.name, - 'offset': sec.offset, - 'size': sec.size, - 'virtual': { - 'address': sec.virtual_address, - }, - }) + self.event["sections"].append( + { + "alignment": sec.alignment, + "entropy": sec.entropy, + "name": sec.name, + "offset": sec.offset, + "size": sec.size, + "virtual": { + "address": sec.virtual_address, + }, + } + ) - self.event['segments'] = [] + self.event["segments"] = [] for seg in binary.segments: - self.event['segments'].append({ - 'command': { - 'offset': seg.command_offset, - 'size': seg.size, - 'type': str(seg.command).split('.')[1], - }, - 'file': { - 'offset': seg.file_offset, - 'size': seg.file_size, - }, - 'flags': seg.flags, - 'protection': { - 'init': PROTECTIONS[seg.init_protection], - 'max': PROTECTIONS[seg.max_protection], - }, - 'name': seg.name, - 'sections': [sec.name for sec in seg.sections], - 'virtual': { - 'address': seg.virtual_address, - 'size': seg.virtual_size, - }, - }) + self.event["segments"].append( + { + "command": { + "offset": seg.command_offset, + "size": seg.size, + "type": str(seg.command).split(".")[1], + }, + "file": { + "offset": seg.file_offset, + "size": seg.file_size, + }, + "flags": seg.flags, + "protection": { + "init": PROTECTIONS[seg.init_protection], + "max": PROTECTIONS[seg.max_protection], + }, + "name": seg.name, + "sections": [sec.name for sec in seg.sections], + "virtual": { + "address": seg.virtual_address, + "size": seg.virtual_size, + }, + } + ) - self.event['symbols'] = { - 'exported': [sym.name for sym in binary.exported_symbols], - 'imported': [sym.name for sym in binary.imported_symbols], - 'libraries': [lib.name for lib in binary.libraries], - 'table': [], + self.event["symbols"] = { + "exported": [sym.name for sym in binary.exported_symbols], + "imported": [sym.name for sym in binary.imported_symbols], + "libraries": [lib.name for lib in binary.libraries], + "table": [], } for sym in binary.symbols: row = { - 'symbol': sym.name, - 'origin': str(sym.origin).rsplit('.')[1], + "symbol": sym.name, + "origin": str(sym.origin).rsplit(".")[1], } if sym.has_binding_info: - row['binding'] = { - 'address': sym.binding_info.address, - 'class': str(sym.binding_info.binding_class).rsplit('.')[1], - 'type': str(sym.binding_info.binding_type).rsplit('.')[1], - 'weak_import': sym.binding_info.weak_import, + row["binding"] = { + "address": sym.binding_info.address, + "class": str(sym.binding_info.binding_class).rsplit(".")[1], + "type": str(sym.binding_info.binding_type).rsplit(".")[1], + "weak_import": sym.binding_info.weak_import, } if sym.binding_info.has_library: lib = sym.binding_info.library - row['binding']['library'] = { - 'name': lib.name, - 'size': lib.size, - 'timestamp': lib.timestamp, - 'version': { - 'compatibility': '.'.join([str(ver) for ver in lib.compatibility_version]), - 'current': '.'.join([str(ver) for ver in lib.current_version]), + row["binding"]["library"] = { + "name": lib.name, + "size": lib.size, + "timestamp": lib.timestamp, + "version": { + "compatibility": ".".join( + [str(ver) for ver in lib.compatibility_version] + ), + "current": ".".join( + [str(ver) for ver in lib.current_version] + ), }, } if sym.binding_info.has_segment: - row['binding']['segment'] = sym.binding_info.segment.name + row["binding"]["segment"] = sym.binding_info.segment.name elif sym.has_export_info: - row['export'] = { - 'address': sym.export_info.address, - 'flags': sym.export_info.flags, + row["export"] = { + "address": sym.export_info.address, + "flags": sym.export_info.flags, } - self.event['symbols']['table'].append(row) + self.event["symbols"]["table"].append(row) - self.event['commands'] = { - 'commands': [str(com.command).split('.')[1] for com in binary.commands] + self.event["commands"] = { + "commands": [str(com.command).split(".")[1] for com in binary.commands] } if binary.has_code_signature: - self.event['commands']['code_signature'] = { - 'command': { - 'offset': binary.code_signature.command_offset, - 'size': binary.code_signature.size, + self.event["commands"]["code_signature"] = { + "command": { + "offset": binary.code_signature.command_offset, + "size": binary.code_signature.size, }, - 'data': { - 'offset': binary.code_signature.data_offset, - 'size': binary.code_signature.data_size, + "data": { + "offset": binary.code_signature.data_offset, + "size": binary.code_signature.data_size, }, } if binary.has_data_in_code: - self.event['commands']['data_in_code'] = { - 'command': { - 'offset': binary.data_in_code.command_offset, - 'size': binary.data_in_code.size, + self.event["commands"]["data_in_code"] = { + "command": { + "offset": binary.data_in_code.command_offset, + "size": binary.data_in_code.size, }, - 'data': { - 'offset': binary.data_in_code.data_offset, - 'size': binary.data_in_code.data_size, + "data": { + "offset": binary.data_in_code.data_offset, + "size": binary.data_in_code.data_size, }, } entries = [] for e in binary.data_in_code.entries: - entries.append({ - 'length': e.length, - 'offset': e.offset, - 'type': str(e.type).split('.')[1] - }) - self.event['commands']['data_in_code']['entries'] = entries + entries.append( + { + "length": e.length, + "offset": e.offset, + "type": str(e.type).split(".")[1], + } + ) + self.event["commands"]["data_in_code"]["entries"] = entries if binary.has_dyld_environment: - self.event['commands']['dyld_environment'] = { - 'command': { - 'offset': binary.dyld_environment.command_offset, - 'size': binary.dyld_environment.size, + self.event["commands"]["dyld_environment"] = { + "command": { + "offset": binary.dyld_environment.command_offset, + "size": binary.dyld_environment.size, }, - 'environment_variable': binary.dyld_environment.value, + "environment_variable": binary.dyld_environment.value, } if binary.has_dyld_info: - self.event['commands']['dyld_info'] = { - 'bind': { - 'offset': binary.dyld_info.bind[0], - 'size': binary.dyld_info.bind[1], - 'lazy': { - 'offset': binary.dyld_info.lazy_bind[0], - 'size': binary.dyld_info.lazy_bind[1], + self.event["commands"]["dyld_info"] = { + "bind": { + "offset": binary.dyld_info.bind[0], + "size": binary.dyld_info.bind[1], + "lazy": { + "offset": binary.dyld_info.lazy_bind[0], + "size": binary.dyld_info.lazy_bind[1], }, - 'weak': { - 'offset': binary.dyld_info.weak_bind[0], - 'size': binary.dyld_info.weak_bind[1], + "weak": { + "offset": binary.dyld_info.weak_bind[0], + "size": binary.dyld_info.weak_bind[1], }, }, - 'command': { - 'offset': binary.dyld_info.command_offset, - 'size': binary.dyld_info.size, + "command": { + "offset": binary.dyld_info.command_offset, + "size": binary.dyld_info.size, }, - 'export': { - 'offset': binary.dyld_info.export_info[0], - 'size': binary.dyld_info.export_info[1], + "export": { + "offset": binary.dyld_info.export_info[0], + "size": binary.dyld_info.export_info[1], }, - 'rebase': { - 'offset': binary.dyld_info.rebase[0], - 'size': binary.dyld_info.rebase[1], + "rebase": { + "offset": binary.dyld_info.rebase[0], + "size": binary.dyld_info.rebase[1], }, } if binary.has_dylinker: - self.event['commands']['load_dylinker'] = { - 'command': { - 'offset': binary.dylinker.command_offset, - 'size': binary.dylinker.size, + self.event["commands"]["load_dylinker"] = { + "command": { + "offset": binary.dylinker.command_offset, + "size": binary.dylinker.size, }, - 'name': binary.dylinker.name, + "name": binary.dylinker.name, } if binary.has_dynamic_symbol_command: - self.event['commands']['dynamic_symbol'] = { - 'command': { - 'offset': binary.dynamic_symbol_command.command_offset, - 'size': binary.dynamic_symbol_command.size, - }, - 'offset': { - 'symbol': { - 'external': binary.dynamic_symbol_command.external_reference_symbol_offset, - 'indirect': binary.dynamic_symbol_command.indirect_symbol_offset, + self.event["commands"]["dynamic_symbol"] = { + "command": { + "offset": binary.dynamic_symbol_command.command_offset, + "size": binary.dynamic_symbol_command.size, + }, + "offset": { + "symbol": { + "external": binary.dynamic_symbol_command.external_reference_symbol_offset, + "indirect": binary.dynamic_symbol_command.indirect_symbol_offset, }, - 'relocation': { - 'external': binary.dynamic_symbol_command.external_relocation_offset, - 'local': binary.dynamic_symbol_command.local_relocation_offset, + "relocation": { + "external": binary.dynamic_symbol_command.external_relocation_offset, + "local": binary.dynamic_symbol_command.local_relocation_offset, }, - 'table': { - 'module': binary.dynamic_symbol_command.module_table_offset, + "table": { + "module": binary.dynamic_symbol_command.module_table_offset, }, - 'toc': binary.dynamic_symbol_command.toc_offset, + "toc": binary.dynamic_symbol_command.toc_offset, }, } if binary.has_encryption_info: - self.event['commands']['encryption_info'] = { - 'command': { - 'offset': binary.encryption_info.command_offset, - 'size': binary.encryption_info.size, + self.event["commands"]["encryption_info"] = { + "command": { + "offset": binary.encryption_info.command_offset, + "size": binary.encryption_info.size, }, - 'crypt': { - 'id': binary.encryption_info.crypt_id, - 'offset': binary.encryption_info.crypt_offset, - 'size': binary.encryption_info.crypt_size, + "crypt": { + "id": binary.encryption_info.crypt_id, + "offset": binary.encryption_info.crypt_offset, + "size": binary.encryption_info.crypt_size, }, } if binary.has_function_starts: - self.event['commands']['function_starts'] = { - 'command': { - 'offset': binary.function_starts.command_offset, - 'size': binary.function_starts.size, + self.event["commands"]["function_starts"] = { + "command": { + "offset": binary.function_starts.command_offset, + "size": binary.function_starts.size, }, - 'data': { - 'offset': binary.function_starts.data_offset, - 'size': binary.function_starts.data_size, + "data": { + "offset": binary.function_starts.data_offset, + "size": binary.function_starts.data_size, }, } if binary.has_main_command: - self.event['commands']['main'] = { - 'command': { - 'offset': binary.main_command.command_offset, - 'size': binary.main_command.size, + self.event["commands"]["main"] = { + "command": { + "offset": binary.main_command.command_offset, + "size": binary.main_command.size, }, - 'entry_point': binary.main_command.entrypoint, - 'stack_size': binary.main_command.stack_size, + "entry_point": binary.main_command.entrypoint, + "stack_size": binary.main_command.stack_size, } if binary.has_rpath: - self.event['commands']['rpath'] = { - 'command': { - 'offset': binary.rpath.command_offset, - 'size': binary.rpath.size, + self.event["commands"]["rpath"] = { + "command": { + "offset": binary.rpath.command_offset, + "size": binary.rpath.size, }, - 'path': binary.rpath.path, + "path": binary.rpath.path, } if binary.has_segment_split_info: - self.event['commands']['segment_split_info'] = { - 'command': { - 'offset': binary.segment_split_info.command_offset, - 'size': binary.segment_split_info.size, + self.event["commands"]["segment_split_info"] = { + "command": { + "offset": binary.segment_split_info.command_offset, + "size": binary.segment_split_info.size, }, - 'data': { - 'offset': binary.segment_split_info.data_offset, - 'size': binary.segment_split_info.data_size, + "data": { + "offset": binary.segment_split_info.data_offset, + "size": binary.segment_split_info.data_size, }, } if binary.has_source_version: - self.event['commands']['source_version'] = { - 'command': { - 'offset': binary.source_version.command_offset, - 'size': binary.source_version.size, + self.event["commands"]["source_version"] = { + "command": { + "offset": binary.source_version.command_offset, + "size": binary.source_version.size, }, - 'version': '.'.join([str(v) for v in binary.source_version.version]), + "version": ".".join([str(v) for v in binary.source_version.version]), } if binary.has_sub_framework: - self.event['commands']['sub_framework'] = { - 'command': { - 'offset': binary.sub_framework.command_offset, - 'size': binary.sub_framework.size, + self.event["commands"]["sub_framework"] = { + "command": { + "offset": binary.sub_framework.command_offset, + "size": binary.sub_framework.size, }, } if binary.has_symbol_command: - self.event['commands']['symbol'] = { - 'command': { - 'offset': binary.symbol_command.command_offset, - 'size': binary.symbol_command.size, + self.event["commands"]["symbol"] = { + "command": { + "offset": binary.symbol_command.command_offset, + "size": binary.symbol_command.size, }, - 'strings': { - 'offset': binary.symbol_command.strings_offset, - 'size': binary.symbol_command.strings_size, + "strings": { + "offset": binary.symbol_command.strings_offset, + "size": binary.symbol_command.strings_size, }, - 'symbol': { - 'offset': binary.symbol_command.symbol_offset, + "symbol": { + "offset": binary.symbol_command.symbol_offset, }, } if binary.has_thread_command: - self.event['commands']['thread'] = { - 'command': { - 'offset': binary.thread_command.command_offset, - 'size': binary.thread_command.size, + self.event["commands"]["thread"] = { + "command": { + "offset": binary.thread_command.command_offset, + "size": binary.thread_command.size, }, } if binary.has_uuid: - self.event['commands']['uuid'] = { - 'command': { - 'offset': binary.uuid.command_offset, - 'size': binary.uuid.size, + self.event["commands"]["uuid"] = { + "command": { + "offset": binary.uuid.command_offset, + "size": binary.uuid.size, }, - 'uuid': ''.join([str(u) for u in binary.uuid.uuid]), + "uuid": "".join([str(u) for u in binary.uuid.uuid]), } if binary.has_version_min: - self.event['commands']['version_min'] = { - 'command': { - 'offset': binary.version_min.command_offset, - 'size': binary.version_min.size, + self.event["commands"]["version_min"] = { + "command": { + "offset": binary.version_min.command_offset, + "size": binary.version_min.size, }, - 'version': '.'.join([str(v) for v in binary.version_min.version]), - 'sdk': '.'.join([str(s) for s in binary.version_min.sdk]), + "version": ".".join([str(v) for v in binary.version_min.version]), + "sdk": ".".join([str(s) for s in binary.version_min.sdk]), } diff --git a/src/python/strelka/scanners/scan_manifest.py b/src/python/strelka/scanners/scan_manifest.py index d8e4d5e1..1af6c15d 100644 --- a/src/python/strelka/scanners/scan_manifest.py +++ b/src/python/strelka/scanners/scan_manifest.py @@ -1,17 +1,23 @@ -from strelka import strelka import json +from strelka import strelka + class ScanManifest(strelka.Scanner): - """Parses browser extension's manifest.json. - """ + """Parses browser extension's manifest.json.""" def scan(self, data, file, options, expire_at): try: jsondata = json.loads(data) - required_keys = ['name', 'manifest_version', 'version'] - optional_keys = ['content_scripts', 'content_security_policy', 'description', 'permissions', 'update_url', - 'key'] + required_keys = ["name", "manifest_version", "version"] + optional_keys = [ + "content_scripts", + "content_security_policy", + "description", + "permissions", + "update_url", + "key", + ] for key in required_keys: self.event[key] = jsondata[key] for key in optional_keys: @@ -20,5 +26,5 @@ def scan(self, data, file, options, expire_at): except strelka.ScannerTimeout: raise except Exception: - self.flags.append('error parsing manifest') + self.flags.append("error parsing manifest") return diff --git a/src/python/strelka/scanners/scan_mmbot.py b/src/python/strelka/scanners/scan_mmbot.py index bee29f01..bc573cd0 100644 --- a/src/python/strelka/scanners/scan_mmbot.py +++ b/src/python/strelka/scanners/scan_mmbot.py @@ -1,10 +1,9 @@ import json import grpc +from strelka.proto import mmbot_pb2, mmbot_pb2_grpc from strelka import strelka -from strelka.proto import mmbot_pb2 -from strelka.proto import mmbot_pb2_grpc class ScanMmbot(strelka.Scanner): @@ -14,29 +13,30 @@ class ScanMmbot(strelka.Scanner): server: Network address and network port of the mmrpc service. Defaults to strelka_mmrpc_1:33907. """ + def scan(self, data, file, options, expire_at): - server = options.get('server', 'strelka_mmrpc_1:33907') + server = options.get("server", "strelka_mmrpc_1:33907") with grpc.insecure_channel(server) as channel: stub = mmbot_pb2_grpc.MmbotStub(channel) response = stub.SendVba(mmbot_pb2.Vba(vba=data.decode())) mmb_dict = json.loads(response.prediction) - self.event['confidence'] = mmb_dict.get('confidence', None) - self.event['prediction'] = mmb_dict.get('prediction', None) - self.event['functions'] = mmb_dict.get('function_names', None) - self.event['features'] = mmb_dict.get('vba_lang_features', None) - self.event['total'] = { - 'comments': mmb_dict.get('vba_cnt_comments', None), - 'functions': mmb_dict.get('vba_cnt_functions', None), - 'locations': mmb_dict.get('vba_cnt_loc', None), + self.event["confidence"] = mmb_dict.get("confidence", None) + self.event["prediction"] = mmb_dict.get("prediction", None) + self.event["functions"] = mmb_dict.get("function_names", None) + self.event["features"] = mmb_dict.get("vba_lang_features", None) + self.event["total"] = { + "comments": mmb_dict.get("vba_cnt_comments", None), + "functions": mmb_dict.get("vba_cnt_functions", None), + "locations": mmb_dict.get("vba_cnt_loc", None), } - self.event['ratio'] = { - 'comments': mmb_dict.get('vba_cnt_comment_loc_ratio', None), - 'functions': mmb_dict.get('vba_cnt_func_loc_ratio', None), + self.event["ratio"] = { + "comments": mmb_dict.get("vba_cnt_comment_loc_ratio", None), + "functions": mmb_dict.get("vba_cnt_func_loc_ratio", None), } - self.event['entropy'] = { - 'characters': mmb_dict.get('vba_entropy_chars', None), - 'functions': mmb_dict.get('vba_entropy_func_names', None), - 'words': mmb_dict.get('vba_entropy_words', None), + self.event["entropy"] = { + "characters": mmb_dict.get("vba_entropy_chars", None), + "functions": mmb_dict.get("vba_entropy_func_names", None), + "words": mmb_dict.get("vba_entropy_words", None), } diff --git a/src/python/strelka/scanners/scan_msi.py b/src/python/strelka/scanners/scan_msi.py index 71c3cd4f..081c5d9d 100644 --- a/src/python/strelka/scanners/scan_msi.py +++ b/src/python/strelka/scanners/scan_msi.py @@ -17,10 +17,10 @@ class ScanMsi(strelka.Scanner): def scan(self, data, file, options, expire_at): # Get a list of keys to collect from the MSI file - keys = options.get('keys', []) + keys = options.get("keys", []) # Get the temporary directory to write the MSI file to - tmp_directory = options.get('tmp_directory', '/tmp/') + tmp_directory = options.get("tmp_directory", "/tmp/") with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: # Write the MSI data to the temporary file @@ -30,7 +30,7 @@ def scan(self, data, file, options, expire_at): # Run exiftool to extract metadata from the file try: (stdout, stderr) = subprocess.Popen( - ['exiftool', '-d', '"%s"', '-j', tmp_data.name], + ["exiftool", "-d", '"%s"', "-j", tmp_data.name], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, ).communicate() @@ -38,7 +38,7 @@ def scan(self, data, file, options, expire_at): raise except Exception as e: # Handle any exceptions raised while running exiftool - self.flags.append(f'msi_extract_error: {e}') + self.flags.append(f"msi_extract_error: {e}") return if stdout: @@ -47,7 +47,7 @@ def scan(self, data, file, options, expire_at): exiftool_dictionary = json.loads(stdout)[0] except ValueError as e: # Handle any errors while parsing the JSON output - self.flags.append(f'msi_parse_error: {e}') + self.flags.append(f"msi_parse_error: {e}") return for k, v in exiftool_dictionary.items(): diff --git a/src/python/strelka/scanners/scan_nf.py b/src/python/strelka/scanners/scan_nf.py index 2065e5b1..258d292f 100644 --- a/src/python/strelka/scanners/scan_nf.py +++ b/src/python/strelka/scanners/scan_nf.py @@ -1,7 +1,9 @@ -from strelka import strelka import cv2 import numpy as np +from strelka import strelka + + class ScanNf(strelka.Scanner): """ Converts RGB image into the HSV (Hue, Saturation, Value) Color Space @@ -16,6 +18,7 @@ class ScanNf(strelka.Scanner): The higher the value for both variables, the more strict the algorithm is. """ + def scan(self, data, file, options, expire_at): try: # Convert image to HSV color space @@ -28,15 +31,17 @@ def scan(self, data, file, options, expire_at): # Calculate percentage of pixels with saturation >= p p = 0.05 - s_perc = float(np.sum(s[int(p * 255.0):-1])) / float(np.prod(image.shape[0:2])) + s_perc = float(np.sum(s[int(p * 255.0) : -1])) / float( + np.prod(image.shape[0:2]) + ) # Percentage threshold; above: valid image, below: noise s_thr = 0.25 - self.event['percentage'] = s_perc - self.event['threshold'] = s_thr + self.event["percentage"] = s_perc + self.event["threshold"] = s_thr if s_perc < s_thr: - self.event['noise_floor'] = True # Potentially dangerous + self.event["noise_floor"] = True # Potentially dangerous else: - self.event['noise_floor'] = False # Not dangerous + self.event["noise_floor"] = False # Not dangerous except cv2.error: self.flags.append("cv2_image_error") diff --git a/src/python/strelka/scanners/scan_ocr.py b/src/python/strelka/scanners/scan_ocr.py index d614695b..30f0ae59 100644 --- a/src/python/strelka/scanners/scan_ocr.py +++ b/src/python/strelka/scanners/scan_ocr.py @@ -15,10 +15,11 @@ class ScanOcr(strelka.Scanner): tmp_directory: Location where tempfile writes temporary files. Defaults to '/tmp/'. """ + def scan(self, data, file, options, expire_at): - extract_text = options.get('extract_text', False) - split_words = options.get('split_words', True) - tmp_directory = options.get('tmp_directory', '/tmp/') + extract_text = options.get("extract_text", False) + split_words = options.get("split_words", True) + tmp_directory = options.get("tmp_directory", "/tmp/") with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: tmp_data.write(data) @@ -26,31 +27,37 @@ def scan(self, data, file, options, expire_at): with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_tess: try: - tess_txt_name = f'{tmp_tess.name}.txt' + tess_txt_name = f"{tmp_tess.name}.txt" completed_process = subprocess.run( - ['tesseract', tmp_data.name, tmp_tess.name], + ["tesseract", tmp_data.name, tmp_tess.name], capture_output=True, - check=True + check=True, ) - with open(tess_txt_name, 'rb') as tess_txt: + _ = completed_process + + with open(tess_txt_name, "rb") as tess_txt: ocr_file = tess_txt.read() if ocr_file: if split_words: - self.event['text'] = ocr_file.split() + self.event["text"] = ocr_file.split() else: - self.event['text'] = ocr_file.replace(b'\r', b'').replace(b'\n', b'').replace(b'\f', b'') + self.event["text"] = ( + ocr_file.replace(b"\r", b"") + .replace(b"\n", b"") + .replace(b"\f", b"") + ) if extract_text: # Send extracted file back to Strelka - self.emit_file(ocr_file, name='text') + self.emit_file(ocr_file, name="text") os.remove(tess_txt_name) except subprocess.CalledProcessError as e: - self.flags.append('tesseract_process_error') + self.flags.append("tesseract_process_error") raise strelka.ScannerException(e.stderr) diff --git a/src/python/strelka/scanners/scan_ole.py b/src/python/strelka/scanners/scan_ole.py index 32673b7b..1f92491f 100644 --- a/src/python/strelka/scanners/scan_ole.py +++ b/src/python/strelka/scanners/scan_ole.py @@ -8,27 +8,30 @@ class ScanOle(strelka.Scanner): """Extracts files from OLECF files.""" + def scan(self, data, file, options, expire_at): - self.event['total'] = {'streams': 0, 'extracted': 0} + self.event["total"] = {"streams": 0, "extracted": 0} try: ole = olefile.OleFileIO(data) ole_streams = ole.listdir(streams=True) - self.event['total']['streams'] = len(ole_streams) + self.event["total"]["streams"] = len(ole_streams) for stream in ole_streams: try: file = ole.openstream(stream) extract_data = file.read() extract_name = f'{"_".join(stream)}' - extract_name = re.sub(r'[\x00-\x1F]', '', extract_name) - if extract_name.endswith('Ole10Native'): + extract_name = re.sub(r"[\x00-\x1F]", "", extract_name) + if extract_name.endswith("Ole10Native"): native_stream = oletools.oleobj.OleNativeStream( bindata=extract_data, ) if native_stream.filename: - extract_name = extract_name + f'_{str(native_stream.filename)}' + extract_name = ( + extract_name + f"_{str(native_stream.filename)}" + ) else: - extract_name = extract_name + '_native_data' + extract_name = extract_name + "_native_data" # Send extracted file back to Strelka self.emit_file(native_stream.data, name=extract_name) @@ -37,12 +40,12 @@ def scan(self, data, file, options, expire_at): # Send extracted file back to Strelka self.emit_file(extract_data, name=extract_name) - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 except AttributeError: self.flags.append("attribute_error_in_stream") except OSError: - self.flags.append('os_error') + self.flags.append("os_error") finally: # TODO this should be wrapped with another try / catch as the variable assignment is not guaranteed ole.close() diff --git a/src/python/strelka/scanners/scan_pcap.py b/src/python/strelka/scanners/scan_pcap.py index edeb4e1a..45c62e80 100644 --- a/src/python/strelka/scanners/scan_pcap.py +++ b/src/python/strelka/scanners/scan_pcap.py @@ -14,6 +14,7 @@ class ScanPcap(strelka.Scanner): limit: Maximum number of files to extract. Defaults to 1000. """ + def scan(self, data, file, options, expire_at): file_limit = options.get("limit", 1000) tmp_directory = options.get("tmp_file_directory", "/tmp/") @@ -39,22 +40,28 @@ def scan(self, data, file, options, expire_at): try: (stdout, stderr) = subprocess.Popen( - ["zeek", - "-r", - tmp_data.name, - "/opt/zeek/share/zeek/policy/frameworks/files/extract-all-files.zeek", - f"FileExtract::prefix={tmp_extract}", - "LogAscii::use_json=T"], + [ + "zeek", + "-r", + tmp_data.name, + "/opt/zeek/share/zeek/policy/frameworks/files/extract-all-files.zeek", + f"FileExtract::prefix={tmp_extract}", + "LogAscii::use_json=T", + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=tmp_extract + cwd=tmp_extract, ).communicate(timeout=scanner_timeout) if os.path.exists(os.path.join(tmp_extract, "files.log")): - with open(os.path.join(tmp_extract, "files.log"), "r") as json_file: + with open( + os.path.join(tmp_extract, "files.log"), "r" + ) as json_file: # files.log is one JSON object per line, convert to array - file_events = json.loads("[" + ",".join(json_file.read().splitlines()) + "]") + file_events = json.loads( + "[" + ",".join(json_file.read().splitlines()) + "]" + ) for file_event in file_events: @@ -65,7 +72,9 @@ def scan(self, data, file, options, expire_at): self.event["total"]["files"] += 1 self.event["files"].append(file_event) - extracted_file_path = os.path.join(tmp_extract, file_event["extracted"]) + extracted_file_path = os.path.join( + tmp_extract, file_event["extracted"] + ) try: if os.path.exists(extracted_file_path): diff --git a/src/python/strelka/scanners/scan_pdf.py b/src/python/strelka/scanners/scan_pdf.py index 550a1cf4..c3af1c57 100644 --- a/src/python/strelka/scanners/scan_pdf.py +++ b/src/python/strelka/scanners/scan_pdf.py @@ -4,8 +4,10 @@ import datetime import io import re -import fitz from collections import Counter + +import fitz + from strelka import strelka # hide PyMuPDF warnings @@ -15,53 +17,62 @@ flags=0, ) + class ScanPdf(strelka.Scanner): """Collects metadata and extracts files from PDF files.""" @staticmethod def _convert_timestamp(timestamp): try: - return str(datetime.datetime.strptime(timestamp.replace("'", ""), "D:%Y%m%d%H%M%S%z")) + return str( + datetime.datetime.strptime( + timestamp.replace("'", ""), "D:%Y%m%d%H%M%S%z" + ) + ) except strelka.ScannerTimeout: raise except Exception: return def scan(self, data, file, options, expire_at): - self.event['images'] = 0 - self.event['lines'] = 0 - self.event['links'] = [] - self.event['words'] = 0 + self.event["images"] = 0 + self.event["lines"] = 0 + self.event["links"] = [] + self.event["words"] = 0 keys = list() try: with io.BytesIO(data) as pdf_io: - reader = fitz.open(stream=pdf_io, filetype='pdf') + reader = fitz.open(stream=pdf_io, filetype="pdf") # collect metadata - self.event['author'] = reader.metadata['author'] - self.event['creator'] = reader.metadata['creator'] - self.event['creation_date'] = self._convert_timestamp(reader.metadata['creationDate']) - self.event['dirty'] = reader.is_dirty - self.event['embedded_files'] = { - 'count': reader.embfile_count(), - 'names': reader.embfile_names() + self.event["author"] = reader.metadata["author"] + self.event["creator"] = reader.metadata["creator"] + self.event["creation_date"] = self._convert_timestamp( + reader.metadata["creationDate"] + ) + self.event["dirty"] = reader.is_dirty + self.event["embedded_files"] = { + "count": reader.embfile_count(), + "names": reader.embfile_names(), } - self.event['encrypted'] = reader.is_encrypted - self.event['needs_pass'] = reader.needs_pass - self.event['format'] = reader.metadata['format'] - self.event['keywords'] = reader.metadata['keywords'] - self.event['language'] = reader.language - self.event['modify_date'] = self._convert_timestamp(reader.metadata['modDate']) - self.event['old_xrefs'] = reader.has_old_style_xrefs - self.event['pages'] = reader.page_count - self.event['producer'] = reader.metadata['producer'] - self.event['repaired'] = reader.is_repaired - self.event['subject'] = reader.metadata['subject'] - self.event['title'] = reader.metadata['title'] - self.event['xrefs'] = reader.xref_length() - 1 - - #collect phones + self.event["encrypted"] = reader.is_encrypted + self.event["needs_pass"] = reader.needs_pass + self.event["format"] = reader.metadata["format"] + self.event["keywords"] = reader.metadata["keywords"] + self.event["language"] = reader.language + self.event["modify_date"] = self._convert_timestamp( + reader.metadata["modDate"] + ) + self.event["old_xrefs"] = reader.has_old_style_xrefs + self.event["pages"] = reader.page_count + self.event["producer"] = reader.metadata["producer"] + self.event["repaired"] = reader.is_repaired + self.event["subject"] = reader.metadata["subject"] + self.event["title"] = reader.metadata["title"] + self.event["xrefs"] = reader.xref_length() - 1 + + # collect phones phones = [] for i in range(self.event["pages"]): phones.extend( @@ -69,9 +80,7 @@ def scan(self, data, file, options, expire_at): re.sub("[^0-9]", "", x) for x in re.findall( phone_numbers, - reader.get_page_text(i).replace( - "\t", " " - ), + reader.get_page_text(i).replace("\t", " "), ) ] ) @@ -80,13 +89,13 @@ def scan(self, data, file, options, expire_at): # iterate through xref objects for xref in range(1, reader.xref_length()): xref_object = reader.xref_object(xref, compressed=True) - for obj in options.get('objects', []): + for obj in options.get("objects", []): pattern = f"/{obj}" if pattern in xref_object: keys.append(obj.lower()) # extract urls from xref - self.event['links'].extend(re.findall('\"(https?://.*?)\"', xref_object)) - self.event['objects'] = dict(Counter(keys)) + self.event["links"].extend(re.findall('"(https?://.*?)"', xref_object)) + self.event["objects"] = dict(Counter(keys)) # submit embedded files to strelka try: @@ -94,7 +103,7 @@ def scan(self, data, file, options, expire_at): props = reader.embfile_info(i) # Send extracted file back to Strelka - self.emit_file(reader.embfile_get(i), name=props['filename']) + self.emit_file(reader.embfile_get(i), name=props["filename"]) except strelka.ScannerTimeout: raise @@ -105,7 +114,7 @@ def scan(self, data, file, options, expire_at): try: for i in range(len(reader)): for img in reader.get_page_images(i): - self.event['images'] += 1 + self.event["images"] += 1 pix = fitz.Pixmap(reader, img[0]) # Send extracted file back to Strelka @@ -120,11 +129,13 @@ def scan(self, data, file, options, expire_at): try: text = "" for page in reader: - self.event['lines'] += len(page.get_text().split('\n')) - self.event['words'] += len(list(filter(None, page.get_text().split(' ')))) + self.event["lines"] += len(page.get_text().split("\n")) + self.event["words"] += len( + list(filter(None, page.get_text().split(" "))) + ) # extract links for link in page.get_links(): - self.event['links'].append(link.get('uri')) + self.event["links"].append(link.get("uri")) text += page.get_text() diff --git a/src/python/strelka/scanners/scan_pe.py b/src/python/strelka/scanners/scan_pe.py index 7c767508..ff705671 100644 --- a/src/python/strelka/scanners/scan_pe.py +++ b/src/python/strelka/scanners/scan_pe.py @@ -2,218 +2,228 @@ import binascii import datetime import hashlib -import pefile + +# Disable Signifiy Debugging Logging +import logging import struct from io import BytesIO -from signify.exceptions import * + +import pefile from signify.authenticode import SignedPEFile +from signify.exceptions import ( + AuthenticodeParseError, + AuthenticodeVerificationError, + CertificateVerificationError, + SignedPEParseError, + SignerInfoParseError, + SignerInfoVerificationError, + VerificationError, +) + from strelka import strelka -# Disable Signifiy Debugging Logging -import logging -logger = logging.getLogger('signify') +logger = logging.getLogger("signify") logger.propagate = False +# Ref: https://ebourg.github.io/jsign/apidocs/src-html/net/jsign/pe/SectionFlag.html + CHARACTERISTICS_DLL = { - 0x0020: 'HIGH_ENTROPY_VA', - 0x0040: 'DYNAMIC_BASE', - 0x0080: 'FORCE_INTEGRITY', - 0x0100: 'NX_COMPAT', - 0x0200: 'NO_ISOLATION', - 0x0400: 'NO_SEH', - 0x0800: 'NO_BIND', - 0x1000: 'APPCONTAINER', - 0x2000: 'WDM_DRIVER', - 0x4000: 'GUARD_CF', - 0x8000: 'TERMINAL_SERVER_AWARE', + 0x0020: "HIGH_ENTROPY_VA", + 0x0040: "DYNAMIC_BASE", + 0x0080: "FORCE_INTEGRITY", + 0x0100: "NX_COMPAT", + 0x0200: "NO_ISOLATION", + 0x0400: "NO_SEH", + 0x0800: "NO_BIND", + 0x1000: "APPCONTAINER", + 0x2000: "WDM_DRIVER", + 0x4000: "GUARD_CF", + 0x8000: "TERMINAL_SERVER_AWARE", } CHARACTERISTICS_IMAGE = { - 0x0001: 'RELOCS_STRIPPED', - 0x0002: 'EXECUTABLE_IMAGE', - 0x0004: 'LINE_NUMS_STRIPPED', - 0x0008: 'LOCAL_SYMS_STRIPPED', - 0x0010: 'AGGRESIVE_WS_TRIM', - 0x0020: 'LARGE_ADDRESS_AWARE', - 0x0040: '16BIT_MACHINE', - 0x0080: 'BYTES_REVERSED_LO', - 0x0100: '32BIT_MACHINE', - 0x0200: 'DEBUG_STRIPPED', - 0x0400: 'REMOVABLE_RUN_FROM_SWAP', - 0x0800: 'NET_RUN_FROM_SWAP', - 0x1000: 'SYSTEM', - 0x2000: 'DLL', - 0x4000: 'UP_SYSTEM_ONLY', - 0x8000: 'BYTES_REVERSED_HI', + 0x0001: "RELOCS_STRIPPED", + 0x0002: "EXECUTABLE_IMAGE", + 0x0004: "LINE_NUMS_STRIPPED", + 0x0008: "LOCAL_SYMS_STRIPPED", + 0x0010: "AGGRESIVE_WS_TRIM", + 0x0020: "LARGE_ADDRESS_AWARE", + 0x0040: "16BIT_MACHINE", + 0x0080: "BYTES_REVERSED_LO", + 0x0100: "32BIT_MACHINE", + 0x0200: "DEBUG_STRIPPED", + 0x0400: "REMOVABLE_RUN_FROM_SWAP", + 0x0800: "NET_RUN_FROM_SWAP", + 0x1000: "SYSTEM", + 0x2000: "DLL", + 0x4000: "UP_SYSTEM_ONLY", + 0x8000: "BYTES_REVERSED_HI", } CHARACTERISTICS_SECTION = { - 0x00000000: 'TYPE_REG', - 0x00000001: 'TYPE_DSECT', - 0x00000002: 'TYPE_NOLOAD', - 0x00000004: 'TYPE_GROUP', - 0x00000008: 'TYPE_NO_PAD', - 0x00000010: 'TYPE_COPY', - 0x00000020: 'CNT_CODE', - 0x00000040: 'CNT_INITIALIZED_DATA', - 0x00000080: 'CNT_UNINITIALIZED_DATA', - 0x00000100: 'LNK_OTHER', - 0x00000200: 'LNK_INFO', - 0x00000400: 'LNK_OVER', - 0x00000800: 'LNK_REMOVE', - 0x00001000: 'LNK_COMDAT', - 0x00004000: 'MEM_PROTECTED', - 0x00004000: 'NO_DEFER_SPEC_EXC', - 0x00008000: 'GPREL', - 0x00008000: 'MEM_FARDATA', - 0x00010000: 'MEM_SYSHEAP', - 0x00020000: 'MEM_PURGEABLE', - 0x00020000: 'MEM_16BIT', - 0x00040000: 'MEM_LOCKED', - 0x00080000: 'MEM_PRELOAD', - 0x00100000: 'ALIGN_1BYTES', - 0x00200000: 'ALIGN_2BYTES', - 0x00300000: 'ALIGN_4BYTES', - 0x00400000: 'ALIGN_8BYTES', - 0x00500000: 'ALIGN_16BYTES', - 0x00600000: 'ALIGN_32BYTES', - 0x00700000: 'ALIGN_64BYTES', - 0x00800000: 'ALIGN_128BYTES', - 0x00900000: 'ALIGN_256BYTES', - 0x00A00000: 'ALIGN_512BYTES', - 0x00B00000: 'ALIGN_1024BYTES', - 0x00C00000: 'ALIGN_2048BYTES', - 0x00D00000: 'ALIGN_4096BYTES', - 0x00E00000: 'ALIGN_8192BYTES', - 0x00F00000: 'ALIGN_MASK', - 0x01000000: 'LNK_NRELOC_OVFL', - 0x02000000: 'MEM_DISCARDABLE', - 0x04000000: 'MEM_NOT_CACHED', - 0x08000000: 'MEM_NOT_PAGED', - 0x10000000: 'MEM_SHARED', - 0x20000000: 'MEM_EXECUTE', - 0x40000000: 'MEM_READ', - 0x80000000: 'MEM_WRITE', + 0x00000000: "TYPE_REG", + 0x00000001: "TYPE_DSECT", + 0x00000002: "TYPE_NOLOAD", + 0x00000004: "TYPE_GROUP", + 0x00000008: "TYPE_NO_PAD", + 0x00000010: "TYPE_COPY", + 0x00000020: "CNT_CODE", + 0x00000040: "CNT_INITIALIZED_DATA", + 0x00000080: "CNT_UNINITIALIZED_DATA", + 0x00000100: "LNK_OTHER", + 0x00000200: "LNK_INFO", + 0x00000400: "LNK_OVER", + 0x00000800: "LNK_REMOVE", + 0x00001000: "LNK_COMDAT", + 0x00004000: "MEM_PROTECTED|NO_DEFER_SPEC_EXC", + 0x00008000: "GPREL|MEM_FARDATA", + 0x00010000: "MEM_SYSHEAP", + 0x00020000: "MEM_PURGEABLE|MEM_16BIT", + 0x00040000: "MEM_LOCKED", + 0x00080000: "MEM_PRELOAD", + 0x00100000: "ALIGN_1BYTES", + 0x00200000: "ALIGN_2BYTES", + 0x00300000: "ALIGN_4BYTES", + 0x00400000: "ALIGN_8BYTES", + 0x00500000: "ALIGN_16BYTES", + 0x00600000: "ALIGN_32BYTES", + 0x00700000: "ALIGN_64BYTES", + 0x00800000: "ALIGN_128BYTES", + 0x00900000: "ALIGN_256BYTES", + 0x00A00000: "ALIGN_512BYTES", + 0x00B00000: "ALIGN_1024BYTES", + 0x00C00000: "ALIGN_2048BYTES", + 0x00D00000: "ALIGN_4096BYTES", + 0x00E00000: "ALIGN_8192BYTES", + 0x00F00000: "ALIGN_MASK", + 0x01000000: "LNK_NRELOC_OVFL", + 0x02000000: "MEM_DISCARDABLE", + 0x04000000: "MEM_NOT_CACHED", + 0x08000000: "MEM_NOT_PAGED", + 0x10000000: "MEM_SHARED", + 0x20000000: "MEM_EXECUTE", + 0x40000000: "MEM_READ", + 0x80000000: "MEM_WRITE", } # https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-tagvs_fixedfileinfo FIXED_FILE_INFO_FLAGS = { - 0x00000001: 'DEBUG', - 0x00000010: 'INFOINFERRED', - 0x00000004: 'PATCHED', - 0x00000002: 'PRERELEASE', - 0x00000008: 'PRIVATEBUILD', - 0x00000020: 'SPECIALBUILD', + 0x00000001: "DEBUG", + 0x00000010: "INFOINFERRED", + 0x00000004: "PATCHED", + 0x00000002: "PRERELEASE", + 0x00000008: "PRIVATEBUILD", + 0x00000020: "SPECIALBUILD", } FIXED_FILE_INFO_OS = { - 0x00000000: 'UNKNOWN', - 0x00000001: 'WINDOWS16', - 0x00000002: 'PM16', - 0x00000003: 'PM32', - 0x00000004: 'WINDOWS32', - 0x00010000: 'DOS', - 0x00040000: 'NT', - 0x00020000: 'OS216', - 0x00030000: 'OS232', + 0x00000000: "UNKNOWN", + 0x00000001: "WINDOWS16", + 0x00000002: "PM16", + 0x00000003: "PM32", + 0x00000004: "WINDOWS32", + 0x00010000: "DOS", + 0x00040000: "NT", + 0x00020000: "OS216", + 0x00030000: "OS232", } FIXED_FILE_INFO_SUBTYPE = { - (0x00000003, 0x00000000): 'UNKNOWN', - (0x00000003, 0x00000001): 'DRV_PRINTER', - (0x00000003, 0x00000002): 'DRV_KEYBOARD', - (0x00000003, 0x00000003): 'DRV_LANGUAGE', - (0x00000003, 0x00000004): 'DRV_DISPLAY', - (0x00000003, 0x00000005): 'DRV_MOUSE', - (0x00000003, 0x00000006): 'DRV_NETWORK', - (0x00000003, 0x00000007): 'DRV_SYSTEM', - (0x00000003, 0x00000008): 'DRV_INSTALLABLE', - (0x00000003, 0x00000009): 'DRV_SOUND', - (0x00000003, 0x0000000A): 'DRV_COMM', - (0x00000003, 0x0000000C): 'DRV_VERSIONED_PRINTER', - (0x00000004, 0x00000000): 'UNKNOWN', - (0x00000004, 0x00000001): 'FONT_RASTER', - (0x00000004, 0x00000002): 'FONT_VECTOR', - (0x00000004, 0x00000003): 'FONT_TRUETYPE', + (0x00000003, 0x00000000): "UNKNOWN", + (0x00000003, 0x00000001): "DRV_PRINTER", + (0x00000003, 0x00000002): "DRV_KEYBOARD", + (0x00000003, 0x00000003): "DRV_LANGUAGE", + (0x00000003, 0x00000004): "DRV_DISPLAY", + (0x00000003, 0x00000005): "DRV_MOUSE", + (0x00000003, 0x00000006): "DRV_NETWORK", + (0x00000003, 0x00000007): "DRV_SYSTEM", + (0x00000003, 0x00000008): "DRV_INSTALLABLE", + (0x00000003, 0x00000009): "DRV_SOUND", + (0x00000003, 0x0000000A): "DRV_COMM", + (0x00000003, 0x0000000C): "DRV_VERSIONED_PRINTER", + (0x00000004, 0x00000000): "UNKNOWN", + (0x00000004, 0x00000001): "FONT_RASTER", + (0x00000004, 0x00000002): "FONT_VECTOR", + (0x00000004, 0x00000003): "FONT_TRUETYPE", } FIXED_FILE_INFO_TYPE = { - 0x00000000: 'UNKNOWN', - 0x00000001: 'APP', - 0x00000002: 'DLL', - 0x00000003: 'DRV', - 0x00000004: 'FONT', - 0x00000005: 'VXD', - 0x00000007: 'STATIC_LIB', + 0x00000000: "UNKNOWN", + 0x00000001: "APP", + 0x00000002: "DLL", + 0x00000003: "DRV", + 0x00000004: "FONT", + 0x00000005: "VXD", + 0x00000007: "STATIC_LIB", } MAGIC_DOS = { - 0x5A4D: 'DOS', - 0x4D5A: 'DOSZM', - 0x454E: 'NE', - 0x454C: 'LE', - 0x584C: 'LX', - 0x5A56: 'TE', - 0x00004550: 'NT', + 0x5A4D: "DOS", + 0x4D5A: "DOSZM", + 0x454E: "NE", + 0x454C: "LE", + 0x584C: "LX", + 0x5A56: "TE", + 0x00004550: "NT", } MAGIC_IMAGE = { - 0x10b: '32_BIT', - 0x20b: '64_BIT', - 0x107: 'ROM_IMAGE', + 0x10B: "32_BIT", + 0x20B: "64_BIT", + 0x107: "ROM_IMAGE", } VAR_FILE_INFO_LANGS = { - 0x0401: 'Arabic', - 0x0415: 'Polish', - 0x0402: 'Bulgarian', - 0x0416: 'Portuguese (Brazil)', - 0x0403: 'Catalan', - 0x0417: 'Rhaeto-Romanic', - 0x0404: 'Traditional Chinese', - 0x0418: 'Romanian', - 0x0405: 'Czech', - 0x0419: 'Russian', - 0x0406: 'Danish', - 0x041A: 'Croato-Serbian (Latin)', - 0x0407: 'German', - 0x041B: 'Slovak', - 0x0408: 'Greek', - 0x041C: 'Albanian', - 0x0409: 'U.S. English', - 0x041D: 'Swedish', - 0x040A: 'Castilian Spanish', - 0x041E: 'Thai', - 0x040B: 'Finnish', - 0x041F: 'Turkish', - 0x040C: 'French', - 0x0420: 'Urdu', - 0x040D: 'Hebrew', - 0x0421: 'Bahasa', - 0x040E: 'Hungarian', - 0x0804: 'Simplified Chinese', - 0x040F: 'Icelandic', - 0x0807: 'Swiss German', - 0x0410: 'Italian', - 0x0809: 'U.K. English', - 0x0411: 'Japanese', - 0x080A: 'Spanish (Mexico)', - 0x0412: 'Korean', - 0x080C: 'Belgian French', - 0x0413: 'Dutch', - 0x0C0C: 'Canadian French', - 0x0414: 'Norwegian – Bokmal', - 0x100C: 'Swiss French', - 0x0810: 'Swiss Italian', - 0x0816: 'Portuguese (Portugal)', - 0x0813: 'Belgian Dutch', - 0x081A: 'Serbo-Croatian (Cyrillic)', - 0x0814: 'Norwegian – Nynorsk', + 0x0401: "Arabic", + 0x0415: "Polish", + 0x0402: "Bulgarian", + 0x0416: "Portuguese (Brazil)", + 0x0403: "Catalan", + 0x0417: "Rhaeto-Romanic", + 0x0404: "Traditional Chinese", + 0x0418: "Romanian", + 0x0405: "Czech", + 0x0419: "Russian", + 0x0406: "Danish", + 0x041A: "Croato-Serbian (Latin)", + 0x0407: "German", + 0x041B: "Slovak", + 0x0408: "Greek", + 0x041C: "Albanian", + 0x0409: "U.S. English", + 0x041D: "Swedish", + 0x040A: "Castilian Spanish", + 0x041E: "Thai", + 0x040B: "Finnish", + 0x041F: "Turkish", + 0x040C: "French", + 0x0420: "Urdu", + 0x040D: "Hebrew", + 0x0421: "Bahasa", + 0x040E: "Hungarian", + 0x0804: "Simplified Chinese", + 0x040F: "Icelandic", + 0x0807: "Swiss German", + 0x0410: "Italian", + 0x0809: "U.K. English", + 0x0411: "Japanese", + 0x080A: "Spanish (Mexico)", + 0x0412: "Korean", + 0x080C: "Belgian French", + 0x0413: "Dutch", + 0x0C0C: "Canadian French", + 0x0414: "Norwegian – Bokmal", + 0x100C: "Swiss French", + 0x0810: "Swiss Italian", + 0x0816: "Portuguese (Portugal)", + 0x0813: "Belgian Dutch", + 0x081A: "Serbo-Croatian (Cyrillic)", + 0x0814: "Norwegian – Nynorsk", } VAR_FILE_INFO_CHARS = { - 0: '7-bit ASCII', - 932: 'Japan (Shift – JIS X-0208)', - 949: 'Korea (Shift – KSC 5601)', - 950: 'Taiwan (Big5)', - 1200: 'Unicode', - 1250: 'Latin-2 (Eastern European)', - 1251: 'Cyrillic', - 1252: 'Multilingual', - 1253: 'Greek', - 1254: 'Turkish', - 1255: 'Hebrew', - 1256: 'Arabic', + 0: "7-bit ASCII", + 932: "Japan (Shift – JIS X-0208)", + 949: "Korea (Shift – KSC 5601)", + 950: "Taiwan (Big5)", + 1200: "Unicode", + 1250: "Latin-2 (Eastern European)", + 1251: "Cyrillic", + 1252: "Multilingual", + 1253: "Greek", + 1254: "Turkish", + 1255: "Hebrew", + 1256: "Arabic", } COMMON_FILE_INFO_NAMES = { "Assembly Version": "assembly_version", @@ -230,7 +240,7 @@ "OriginalFilename": "original_filename", "PrivateBuild": "private_build", "ProductName": "product_name", - "ProductVersion": "product_version" + "ProductVersion": "product_version", } @@ -238,20 +248,20 @@ def parse_rich(pe): try: if rich_data := pe.parse_rich_header(): rich_dict = { - 'key': rich_data['key'].hex(), - 'clear_data': { - 'data': base64.b64encode(rich_data['clear_data']), - 'md5': hashlib.md5(rich_data['clear_data']).hexdigest(), + "key": rich_data["key"].hex(), + "clear_data": { + "data": base64.b64encode(rich_data["clear_data"]), + "md5": hashlib.md5(rich_data["clear_data"]).hexdigest(), }, - 'raw_data': { - 'data': base64.b64encode(rich_data['raw_data']), - 'md5': hashlib.md5(rich_data['raw_data']).hexdigest(), + "raw_data": { + "data": base64.b64encode(rich_data["raw_data"]), + "md5": hashlib.md5(rich_data["raw_data"]).hexdigest(), }, } return rich_dict except pefile.PEFormatError: - return 'pe_format_error' + return "pe_format_error" def parse_certificates(data): @@ -268,8 +278,15 @@ def parse_certificates(data): raise except Exception: return "no_certs_found" - except (SignedPEParseError, SignerInfoParseError, AuthenticodeParseError, VerificationError, - CertificateVerificationError, SignerInfoVerificationError, AuthenticodeVerificationError) as e: + except ( + SignedPEParseError, + SignerInfoParseError, + AuthenticodeParseError, + VerificationError, + CertificateVerificationError, + SignerInfoVerificationError, + AuthenticodeVerificationError, + ): return "pe_certificate_error" cert_list = [] @@ -285,44 +302,56 @@ def parse_certificates(data): cert_dict = { "country_name": issuer.get("country_name"), "organization_name": issuer.get("organization_name"), - "organizational_unit_name": issuer.get("organizational_unit_name"), + "organizational_unit_name": issuer.get( + "organizational_unit_name" + ), "common_name": issuer.get("common_name"), "serial_number": str(cert.serial_number), "issuer_dn": cert.issuer.dn, "subject_dn": cert.subject.dn, "valid_from": cert.valid_from.isoformat(), "valid_to": cert.valid_to.isoformat(), - "signature_algorithim": str(cert.signature_algorithm['algorithm']) + "signature_algorithim": str( + cert.signature_algorithm["algorithm"] + ), } cert_list.append(cert_dict) except strelka.ScannerTimeout: raise - except Exception as e: + except Exception: return "exception parsing certificate exception" signer_dict = { - 'issuer_dn': signed_data.signer_info.issuer.dn, - 'serial_number': str(signed_data.signer_info.serial_number), - 'program_name': signed_data.signer_info.program_name, - 'more_info': signed_data.signer_info.more_info + "issuer_dn": signed_data.signer_info.issuer.dn, + "serial_number": str(signed_data.signer_info.serial_number), + "program_name": signed_data.signer_info.program_name, + "more_info": signed_data.signer_info.more_info, } # signer information signer_list.append(signer_dict) if signed_data.signer_info.countersigner: - if hasattr(signed_data.signer_info.countersigner, 'issuer'): - counter_signer_issuer_dn = signed_data.signer_info.countersigner.issuer.dn + if hasattr(signed_data.signer_info.countersigner, "issuer"): + counter_signer_issuer_dn = ( + signed_data.signer_info.countersigner.issuer.dn + ) else: - counter_signer_issuer_dn = signed_data.signer_info.countersigner.signer_info.issuer.dn - - if hasattr(signed_data.signer_info.countersigner, 'serial_number'): - counter_signer_sn = signed_data.signer_info.countersigner.serial_number + counter_signer_issuer_dn = ( + signed_data.signer_info.countersigner.signer_info.issuer.dn + ) + + if hasattr(signed_data.signer_info.countersigner, "serial_number"): + counter_signer_sn = ( + signed_data.signer_info.countersigner.serial_number + ) else: - counter_signer_sn = signed_data.signer_info.countersigner.signer_info.serial_number + counter_signer_sn = ( + signed_data.signer_info.countersigner.signer_info.serial_number + ) counter_signer_dict = { - 'issuer_dn': counter_signer_issuer_dn, - 'serial_number': str(counter_signer_sn), - 'signing_time': signed_data.signer_info.countersigner.signing_time.isoformat() + "issuer_dn": counter_signer_issuer_dn, + "serial_number": str(counter_signer_sn), + "signing_time": signed_data.signer_info.countersigner.signing_time.isoformat(), } counter_signer_list.append(counter_signer_dict) @@ -330,9 +359,9 @@ def parse_certificates(data): return "no certificate in signed data" security_dict = { - 'certificates': cert_list, - 'signers': signer_list, - 'counter_signers': counter_signer_list + "certificates": cert_list, + "signers": signer_list, + "counter_signers": counter_signer_list, } try: @@ -341,8 +370,8 @@ def parse_certificates(data): except strelka.ScannerTimeout: raise except Exception as e: - security_dict['verification'] = False - security_dict['verification_error'] = str(e) + security_dict["verification"] = False + security_dict["verification_error"] = str(e) return security_dict @@ -354,158 +383,187 @@ def scan(self, data, file, options, expire_at): try: pe = pefile.PE(data=data) except pefile.PEFormatError: - self.flags.append('pe_format_error') + self.flags.append("pe_format_error") return if rich_dict := parse_rich(pe): if type(rich_dict) != str: - self.event['rich'] = rich_dict + self.event["rich"] = rich_dict else: self.flags.append(rich_dict) if cert_dict := parse_certificates(data): if type(cert_dict) != str: - self.event['security'] = cert_dict + self.event["security"] = cert_dict else: self.flags.append(cert_dict) - self.event['total'] = { - 'libraries': 0, - 'resources': 0, - 'sections': len(pe.sections), - 'symbols': 0, + self.event["total"] = { + "libraries": 0, + "resources": 0, + "sections": len(pe.sections), + "symbols": 0, } - self.event['summary'] = {} + self.event["summary"] = {} - if hasattr(pe, 'DIRECTORY_ENTRY_DEBUG'): + if hasattr(pe, "DIRECTORY_ENTRY_DEBUG"): for d in pe.DIRECTORY_ENTRY_DEBUG: try: data = pe.get_data(d.struct.AddressOfRawData, d.struct.SizeOfData) - if data.find(b'RSDS') != -1 and len(data) > 24: - pdb = data[data.find(b'RSDS'):] - self.event['debug'] = { - 'type': 'rsds', - 'guid': b'%s-%s-%s-%s' % ( + if data.find(b"RSDS") != -1 and len(data) > 24: + pdb = data[data.find(b"RSDS") :] + self.event["debug"] = { + "type": "rsds", + "guid": b"%s-%s-%s-%s" + % ( binascii.hexlify(pdb[4:8]), binascii.hexlify(pdb[8:10]), binascii.hexlify(pdb[10:12]), binascii.hexlify(pdb[12:20]), ), - 'age': struct.unpack(' 16: - pdb = data[data.find(b'NB10') + 8:] - self.event['debug'] = { - 'type': 'nb10', - 'created': struct.unpack(' 16: + pdb = data[data.find(b"NB10") + 8 :] + self.event["debug"] = { + "type": "nb10", + "created": struct.unpack("= file_limit: + if self.event["total"]["extracted"] >= file_limit: break try: - extract_data = b'' + extract_data = b"" file_info = rar_obj.getinfo(name) - self.event['host_os'] = HOST_OS_MAPPING[file_info.host_os] + self.event["host_os"] = HOST_OS_MAPPING[ + file_info.host_os + ] if not file_info.needs_password(): extract_data = rar_obj.read(name) else: if i == 0: - self.flags.append('password_protected') - + self.flags.append("password_protected") + if not password and i == 0: for pw in self.passwords: try: - data = rar_obj.open(name, mode='r', psw=pw.decode('utf-8')) + data = rar_obj.open( + name, + mode="r", + psw=pw.decode("utf-8"), + ) if data.readable(): extract_data = data.readall() - password = pw.decode('utf-8') - self.event['password'] = pw.decode('utf-8') + password = pw.decode("utf-8") + self.event["password"] = pw.decode( + "utf-8" + ) break - except (RuntimeError, rarfile.BadRarFile, rarfile.RarCRCError, rarfile.RarWrongPassword): + except ( + RuntimeError, + rarfile.BadRarFile, + rarfile.RarCRCError, + rarfile.RarWrongPassword, + ): pass elif not password and i > 0: break else: try: - data = rar_obj.open(name, mode='r', psw=password) + data = rar_obj.open( + name, mode="r", psw=password + ) if data.readable(): extract_data = data.readall() - except (RuntimeError, rarfile.BadRarFile, rarfile.RarCRCError, rarfile.RarWrongPassword): + except ( + RuntimeError, + rarfile.BadRarFile, + rarfile.RarCRCError, + rarfile.RarWrongPassword, + ): pass - if not extract_data and not 'no_password_match_found' in self.flags: - self.flags.append('no_password_match_found') + if ( + not extract_data + and "no_password_match_found" not in self.flags + ): + self.flags.append("no_password_match_found") if extract_data: # Send extracted file back to Strelka - self.emit_file(extract_data, name=f'{file_info.filename}') + self.emit_file( + extract_data, name=f"{file_info.filename}" + ) - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 except NotImplementedError: - self.flags.append('unsupport_compression') + self.flags.append("unsupport_compression") except RuntimeError: - self.flags.append('runtime_error') + self.flags.append("runtime_error") except ValueError: - self.flags.append('value_error') - + self.flags.append("value_error") + except rarfile.BadRarFile: - self.flags.append('bad_rar') \ No newline at end of file + self.flags.append("bad_rar") diff --git a/src/python/strelka/scanners/scan_rpm.py b/src/python/strelka/scanners/scan_rpm.py index 05ca415c..c6303936 100644 --- a/src/python/strelka/scanners/scan_rpm.py +++ b/src/python/strelka/scanners/scan_rpm.py @@ -12,8 +12,9 @@ class ScanRpm(strelka.Scanner): tmp_directory: Location where tempfile writes temporary files. Defaults to '/tmp/'. """ + def scan(self, data, file, options, expire_at): - tmp_directory = options.get('tmp_directory', '/tmp/') + tmp_directory = options.get("tmp_directory", "/tmp/") with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: tmp_data.write(data) @@ -21,59 +22,61 @@ def scan(self, data, file, options, expire_at): try: with rpmfile.open(tmp_data.name) as rpm_obj: - extract_name = '' + extract_name = "" for (key, value) in rpm_obj.headers.items(): - if key == 'arch': - self.event['architecture'] = value - elif key == 'archive_compression': - self.event['archive_compression'] = value - elif key == 'archive_format': - self.event['archive_format'] = value - elif key == 'authors': - self.event['authors'] = value - elif key == 'buildhost': - self.event['build_host'] = value - elif key == 'buildtime': - self.event['build_time'] = value - elif key == 'copyright': - self.event['copyright'] = value - elif key == 'description': + if key == "arch": + self.event["architecture"] = value + elif key == "archive_compression": + self.event["archive_compression"] = value + elif key == "archive_format": + self.event["archive_format"] = value + elif key == "authors": + self.event["authors"] = value + elif key == "buildhost": + self.event["build_host"] = value + elif key == "buildtime": + self.event["build_time"] = value + elif key == "copyright": + self.event["copyright"] = value + elif key == "description": if value is not None: - self.event['description'] = value.replace(b'\n', b' ') - elif key == 'filenames': - self.event['filenames'] = value - elif key == 'group': - self.event['group'] = value - elif key == 'name': - self.event['name'] = value - extract_name = f'{value.decode()}' - elif key == 'os': - self.event['os'] = value - elif key == 'packager': - self.event['packager'] = value - elif key == 'provides': - self.event['provides'] = value - elif key == 'release': - self.event['release'] = value - elif key == 'requirename': - self.event['require_name'] = value - elif key == 'rpmversion': - self.event['rpm_version'] = value - elif key == 'serial': - self.event['serial'] = value - elif key == 'sourcerpm': - self.event['source_rpm'] = value - elif key == 'summary': - self.event['summary'] = value - elif key == 'vendor': - self.event['vendor'] = value - elif key == 'version': - self.event['version'] = value - elif key == 'url': - self.event['url'] = value + self.event["description"] = value.replace(b"\n", b" ") + elif key == "filenames": + self.event["filenames"] = value + elif key == "group": + self.event["group"] = value + elif key == "name": + self.event["name"] = value + extract_name = f"{value.decode()}" + elif key == "os": + self.event["os"] = value + elif key == "packager": + self.event["packager"] = value + elif key == "provides": + self.event["provides"] = value + elif key == "release": + self.event["release"] = value + elif key == "requirename": + self.event["require_name"] = value + elif key == "rpmversion": + self.event["rpm_version"] = value + elif key == "serial": + self.event["serial"] = value + elif key == "sourcerpm": + self.event["source_rpm"] = value + elif key == "summary": + self.event["summary"] = value + elif key == "vendor": + self.event["vendor"] = value + elif key == "version": + self.event["version"] = value + elif key == "url": + self.event["url"] = value # Send extracted file back to Strelka - self.emit_file(data[rpm_obj.data_offset:], name=extract_name) # FIXME: extract_name always empty string + self.emit_file( + data[rpm_obj.data_offset :], name=extract_name + ) # FIXME: extract_name always empty string except ValueError: - self.flags.append('value_error') + self.flags.append("value_error") diff --git a/src/python/strelka/scanners/scan_rtf.py b/src/python/strelka/scanners/scan_rtf.py index 7851c029..528196a1 100644 --- a/src/python/strelka/scanners/scan_rtf.py +++ b/src/python/strelka/scanners/scan_rtf.py @@ -10,17 +10,18 @@ class ScanRtf(strelka.Scanner): limit: Maximum number of files to extract. Defaults to 1000. """ + def scan(self, data, file, options, expire_at): - file_limit = options.get('limit', 1000) + file_limit = options.get("limit", 1000) - self.event['total'] = {'rtf_objects': 0, 'extracted': 0} + self.event["total"] = {"rtf_objects": 0, "extracted": 0} rtf = rtfobj.RtfObjParser(data) rtf.parse() - self.event['total']['rtf_objects'] = len(rtf.rtf_objects) + self.event["total"]["rtf_objects"] = len(rtf.rtf_objects) for rtf_object in rtf.rtf_objects: - if self.event['total']['extracted'] >= file_limit: + if self.event["total"]["extracted"] >= file_limit: break index = rtf.server.index(rtf_object) @@ -33,11 +34,11 @@ def scan(self, data, file, options, expire_at): elif rtf_object.is_ole: # Send extracted file back to Strelka - self.emit_file(rtf_object.oledata, name=f'rtf_object_{index}') + self.emit_file(rtf_object.oledata, name=f"rtf_object_{index}") else: # Send extracted file back to Strelka - self.emit_file(rtf_object.rawdata, name=f'rtf_object_{index}') + self.emit_file(rtf_object.rawdata, name=f"rtf_object_{index}") - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 diff --git a/src/python/strelka/scanners/scan_seven_zip.py b/src/python/strelka/scanners/scan_seven_zip.py index 1bc79e91..d90b13fa 100644 --- a/src/python/strelka/scanners/scan_seven_zip.py +++ b/src/python/strelka/scanners/scan_seven_zip.py @@ -1,4 +1,3 @@ -import logging import os import pathlib import re @@ -215,7 +214,7 @@ def parse_7zip_stdout(self, output_7zip, file_limit): regex_mode_properties = re.compile(r"^(--|----)$") # Comment = - regex_property = re.compile(r"^(.+) = (.+)$") + # regex_property = re.compile(r"^(.+) = (.+)$") # Date Time Attr Size Compressed Name regex_mode_files = re.compile( diff --git a/src/python/strelka/scanners/scan_strings.py b/src/python/strelka/scanners/scan_strings.py index 8b33e35b..6c0cfcb8 100644 --- a/src/python/strelka/scanners/scan_strings.py +++ b/src/python/strelka/scanners/scan_strings.py @@ -15,13 +15,14 @@ class ScanStrings(strelka.Scanner): collected. Defaults to 0 (unlimited). """ + def init(self): - self.strings_regex = re.compile(br'[^\x00-\x1F\x7F-\xFF]{4,}') + self.strings_regex = re.compile(rb"[^\x00-\x1F\x7F-\xFF]{4,}") def scan(self, data, file, options, expire_at): - limit = options.get('limit', 0) + limit = options.get("limit", 0) strings = self.strings_regex.findall(data) if limit: strings = strings[:limit] - self.event['strings'] = strings + self.event["strings"] = strings diff --git a/src/python/strelka/scanners/scan_swf.py b/src/python/strelka/scanners/scan_swf.py index 4c65e540..9067771d 100644 --- a/src/python/strelka/scanners/scan_swf.py +++ b/src/python/strelka/scanners/scan_swf.py @@ -9,32 +9,33 @@ class ScanSwf(strelka.Scanner): """Decompresses SWF files.""" + def scan(self, data, file, options, expire_at): with io.BytesIO(data) as swf_io: swf_io.seek(4) - swf_size = struct.unpack('= file_limit: + if self.event["total"]["extracted"] >= file_limit: break try: @@ -33,12 +34,14 @@ def scan(self, data, file, options, expire_at): if tar_file is not None: # Send extracted file back to Strelka - self.emit_file(tar_file.read(), name=tar_member.name) + self.emit_file( + tar_file.read(), name=tar_member.name + ) - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 except KeyError: - self.flags.append('key_error') + self.flags.append("key_error") except tarfile.ReadError: - self.flags.append('tarfile_read_error') + self.flags.append("tarfile_read_error") diff --git a/src/python/strelka/scanners/scan_tlsh.py b/src/python/strelka/scanners/scan_tlsh.py index 746b51eb..1724ab0b 100644 --- a/src/python/strelka/scanners/scan_tlsh.py +++ b/src/python/strelka/scanners/scan_tlsh.py @@ -2,8 +2,10 @@ import glob import os + import tlsh import yaml + from strelka import strelka @@ -28,28 +30,30 @@ def init(self): self.tlsh_rules = None def scan(self, data, file, options, expire_at): - location = options.get('location', '/etc/strelka/tlsh/') - score_threshold = options.get('score', 30) + location = options.get("location", "/etc/strelka/tlsh/") + score_threshold = options.get("score", 30) tlsh_file = tlsh.hash(data) - if tlsh_file == 'TNULL': - self.flags.append('null_tlsh') + if tlsh_file == "TNULL": + self.flags.append("null_tlsh") return try: if self.tlsh_rules is None: if os.path.isdir(location): self.tlsh_rules = {} - for filepath in glob.iglob(f'{location}/**/*.yaml', recursive=True): - with open(filepath, 'r') as tlsh_rules: + for filepath in glob.iglob(f"{location}/**/*.yaml", recursive=True): + with open(filepath, "r") as tlsh_rules: try: - self.tlsh_rules.update(yaml.safe_load(tlsh_rules.read())) + self.tlsh_rules.update( + yaml.safe_load(tlsh_rules.read()) + ) except yaml.YAMLError: self.flags.append(f"yaml_error: {filepath}") return elif os.path.isfile(location): - with open(location, 'r') as tlsh_rules: + with open(location, "r") as tlsh_rules: self.tlsh_rules = yaml.safe_load(tlsh_rules.read()) else: self.flags.append("tlsh_location_not_found") @@ -72,4 +76,4 @@ def scan(self, data, file, options, expire_at): this_family = family this_score = score - self.event['match'] = {'family': this_family, 'score': this_score} + self.event["match"] = {"family": this_family, "score": this_score} diff --git a/src/python/strelka/scanners/scan_tnef.py b/src/python/strelka/scanners/scan_tnef.py index 5349d3d2..2735163c 100644 --- a/src/python/strelka/scanners/scan_tnef.py +++ b/src/python/strelka/scanners/scan_tnef.py @@ -5,43 +5,44 @@ class ScanTnef(strelka.Scanner): """Collects metadata and extract files from TNEF files.""" + def scan(self, data, file, options, expire_at): - self.event['total'] = {'attachments': 0, 'extracted': 0} - self.event.setdefault('object_names', []) + self.event["total"] = {"attachments": 0, "extracted": 0} + self.event.setdefault("object_names", []) tnef = tnefparse.TNEF(data) - tnef_objects = getattr(tnef, 'objects', []) + tnef_objects = getattr(tnef, "objects", []) for tnef_object in tnef_objects: descriptive_name = tnefparse.TNEF.codes.get(tnef_object.name) - if descriptive_name not in self.event['object_names']: - self.event['object_names'].append(descriptive_name) + if descriptive_name not in self.event["object_names"]: + self.event["object_names"].append(descriptive_name) try: - object_data = tnef_object.data.strip(b'\0') or None + object_data = tnef_object.data.strip(b"\0") or None except strelka.ScannerTimeout: raise except Exception: object_data = tnef_object.data if object_data is not None: - if descriptive_name == 'Subject': - self.event['subject'] = object_data - elif descriptive_name == 'Message ID': - self.event['message_id'] = object_data - elif descriptive_name == 'Message Class': - self.event['message_class'] = object_data - - tnef_attachments = getattr(tnef, 'attachments', []) - self.event['total']['attachments'] = len(tnef_attachments) + if descriptive_name == "Subject": + self.event["subject"] = object_data + elif descriptive_name == "Message ID": + self.event["message_id"] = object_data + elif descriptive_name == "Message Class": + self.event["message_class"] = object_data + + tnef_attachments = getattr(tnef, "attachments", []) + self.event["total"]["attachments"] = len(tnef_attachments) for attachment in tnef_attachments: # Send extracted file back to Strelka self.emit_file(attachment.data, name=attachment.name.decode()) - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 - tnef_html = getattr(tnef, 'htmlbody', None) + tnef_html = getattr(tnef, "htmlbody", None) if tnef_html: # Send extracted file back to Strelka - self.emit_file(tnef_html, name='htmlbody') + self.emit_file(tnef_html, name="htmlbody") diff --git a/src/python/strelka/scanners/scan_upx.py b/src/python/strelka/scanners/scan_upx.py index c626c6b3..638f98cd 100644 --- a/src/python/strelka/scanners/scan_upx.py +++ b/src/python/strelka/scanners/scan_upx.py @@ -12,29 +12,30 @@ class ScanUpx(strelka.Scanner): tmp_directory: Location where tempfile writes temporary files. Defaults to '/tmp/'. """ + def scan(self, data, file, options, expire_at): - tmp_directory = options.get('tmp_directory', '/tmp/') + tmp_directory = options.get("tmp_directory", "/tmp/") with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: tmp_data.write(data) tmp_data.flush() upx_return = subprocess.call( - ['upx', '-d', tmp_data.name, '-o', f'{tmp_data.name}_upx'], + ["upx", "-d", tmp_data.name, "-o", f"{tmp_data.name}_upx"], stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL + stderr=subprocess.DEVNULL, ) if upx_return == 0: - with open(f'{tmp_data.name}_upx', 'rb') as upx_fin: + with open(f"{tmp_data.name}_upx", "rb") as upx_fin: upx_file = upx_fin.read() upx_size = len(upx_file) if upx_size > len(data): - self.flags.append('upx_packed') + self.flags.append("upx_packed") # Send extracted file back to Strelka self.emit_file(upx_file) - os.remove(f'{tmp_data.name}_upx') + os.remove(f"{tmp_data.name}_upx") else: - self.flags.append(f'return_code_{upx_return}') + self.flags.append(f"return_code_{upx_return}") diff --git a/src/python/strelka/scanners/scan_url.py b/src/python/strelka/scanners/scan_url.py index 99695a45..1e05784a 100644 --- a/src/python/strelka/scanners/scan_url.py +++ b/src/python/strelka/scanners/scan_url.py @@ -22,23 +22,28 @@ class ScanUrl(strelka.Scanner): stored in the regexes dictionary. Defaults to False (uses default regex). """ + def init(self): - self.regexes = {'default': re.compile(br'(?:\b[a-z\d.-]+://[^<>\s\(\)]+|\b(?:(?:(?:[^\s!@#$%^&*()_=+[\]{}\|;:\'",.<>/?]+)\.)+(?:aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|ikano|il|im|imamat|imdb|immo|immobilien|in|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jlc|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtr|mu|museum|mutual|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|pa|page|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn--11b4c3d|xn--1ck2e1b|xn--1qqw23a|xn--2scrj9c|xn--30rr7y|xn--3bst00m|xn--3ds443g|xn--3e0b707e|xn--3hcrj9c|xn--3oq18vl8pn36a|xn--3pxu8k|xn--42c2d9a|xn--45br5cyl|xn--45brj9c|xn--45q11c|xn--4gbrim|xn--54b7fta0cc|xn--55qw42g|xn--55qx5d|xn--5su34j936bgsg|xn--5tzm5g|xn--6frz82g|xn--6qq986b3xl|xn--80adxhks|xn--80ao21a|xn--80aqecdr1a|xn--80asehdb|xn--80aswg|xn--8y0a063a|xn--90a3ac|xn--90ae|xn--90ais|xn--9dbq2a|xn--9et52u|xn--9krt00a|xn--b4w605ferd|xn--bck1b9a5dre4c|xn--c1avg|xn--c2br7g|xn--cck2b3b|xn--cg4bki|xn--clchc0ea0b2g2a9gcd|xn--czr694b|xn--czrs0t|xn--czru2d|xn--d1acj3b|xn--d1alf|xn--e1a4c|xn--eckvdtc9d|xn--efvy88h|xn--estv75g|xn--fct429k|xn--fhbei|xn--fiq228c5hs|xn--fiq64b|xn--fiqs8s|xn--fiqz9s|xn--fjq720a|xn--flw351e|xn--fpcrj9c3d|xn--fzc2c9e2c|xn--fzys8d69uvgm|xn--g2xx48c|xn--gckr3f0f|xn--gecrj9c|xn--gk3at1e|xn--h2breg3eve|xn--h2brj9c|xn--h2brj9c8c|xn--hxt814e|xn--i1b6b1a6a2e|xn--imr513n|xn--io0a7i|xn--j1aef|xn--j1amh|xn--j6w193g|xn--jlq61u9w7b|xn--jvr189m|xn--kcrx77d1x4a|xn--kprw13d|xn--kpry57d|xn--kpu716f|xn--kput3i|xn--l1acc|xn--lgbbat1ad8j|xn--mgb9awbf|xn--mgba3a3ejt|xn--mgba3a4f16a|xn--mgba7c0bbn0a|xn--mgbaakc7dvf|xn--mgbaam7a8h|xn--mgbab2bd|xn--mgbai9azgqp6j|xn--mgbayh7gpa|xn--mgbb9fbpob|xn--mgbbh1a|xn--mgbbh1a71e|xn--mgbc0a9azcg|xn--mgbca7dzdo|xn--mgberp4a5d4ar|xn--mgbgu82a|xn--mgbi4ecexp|xn--mgbpl2fh|xn--mgbt3dhd|xn--mgbtx2b|xn--mgbx4cd0ab|xn--mix891f|xn--mk1bu44c|xn--mxtq1m|xn--ngbc5azd|xn--ngbe9e0a|xn--ngbrx|xn--node|xn--nqv7f|xn--nqv7fs00ema|xn--nyqy26a|xn--o3cw4h|xn--ogbpf8fl|xn--otu796d|xn--p1acf|xn--p1ai|xn--pbt977c|xn--pgbs0dh|xn--pssy2u|xn--q9jyb4c|xn--qcka1pmc|xn--qxam|xn--rhqv96g|xn--rovu88b|xn--rvc1e0am3e|xn--s9brj9c|xn--ses554g|xn--t60b56a|xn--tckwe|xn--tiq49xqyj|xn--unup4y|xn--vermgensberater-ctb|xn--vermgensberatung-pwb|xn--vhquv|xn--vuq861b|xn--w4r85el8fhu5dnra|xn--w4rs40l|xn--wgbh1c|xn--wgbl6a|xn--xhq521b|xn--xkc2al3hye2a|xn--xkc2dl3a5ee0h|xn--y9a3aq|xn--yfro4i67o|xn--ygbi2ammx|xn--zfr164b|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)|(?:(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]))(?:[;/][^#?<>\s]*)?(?:\?[^#<>\s]*)?(?:#[^<>\s\(\)]*)?(?!\w))')} + self.regexes = { + "default": re.compile( + rb'(?:\b[a-z\d.-]+://[^<>\s\(\)]+|\b(?:(?:(?:[^\s!@#$%^&*()_=+[\]{}\|;:\'",.<>/?]+)\.)+(?:aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|ikano|il|im|imamat|imdb|immo|immobilien|in|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jlc|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtr|mu|museum|mutual|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|pa|page|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn--11b4c3d|xn--1ck2e1b|xn--1qqw23a|xn--2scrj9c|xn--30rr7y|xn--3bst00m|xn--3ds443g|xn--3e0b707e|xn--3hcrj9c|xn--3oq18vl8pn36a|xn--3pxu8k|xn--42c2d9a|xn--45br5cyl|xn--45brj9c|xn--45q11c|xn--4gbrim|xn--54b7fta0cc|xn--55qw42g|xn--55qx5d|xn--5su34j936bgsg|xn--5tzm5g|xn--6frz82g|xn--6qq986b3xl|xn--80adxhks|xn--80ao21a|xn--80aqecdr1a|xn--80asehdb|xn--80aswg|xn--8y0a063a|xn--90a3ac|xn--90ae|xn--90ais|xn--9dbq2a|xn--9et52u|xn--9krt00a|xn--b4w605ferd|xn--bck1b9a5dre4c|xn--c1avg|xn--c2br7g|xn--cck2b3b|xn--cg4bki|xn--clchc0ea0b2g2a9gcd|xn--czr694b|xn--czrs0t|xn--czru2d|xn--d1acj3b|xn--d1alf|xn--e1a4c|xn--eckvdtc9d|xn--efvy88h|xn--estv75g|xn--fct429k|xn--fhbei|xn--fiq228c5hs|xn--fiq64b|xn--fiqs8s|xn--fiqz9s|xn--fjq720a|xn--flw351e|xn--fpcrj9c3d|xn--fzc2c9e2c|xn--fzys8d69uvgm|xn--g2xx48c|xn--gckr3f0f|xn--gecrj9c|xn--gk3at1e|xn--h2breg3eve|xn--h2brj9c|xn--h2brj9c8c|xn--hxt814e|xn--i1b6b1a6a2e|xn--imr513n|xn--io0a7i|xn--j1aef|xn--j1amh|xn--j6w193g|xn--jlq61u9w7b|xn--jvr189m|xn--kcrx77d1x4a|xn--kprw13d|xn--kpry57d|xn--kpu716f|xn--kput3i|xn--l1acc|xn--lgbbat1ad8j|xn--mgb9awbf|xn--mgba3a3ejt|xn--mgba3a4f16a|xn--mgba7c0bbn0a|xn--mgbaakc7dvf|xn--mgbaam7a8h|xn--mgbab2bd|xn--mgbai9azgqp6j|xn--mgbayh7gpa|xn--mgbb9fbpob|xn--mgbbh1a|xn--mgbbh1a71e|xn--mgbc0a9azcg|xn--mgbca7dzdo|xn--mgberp4a5d4ar|xn--mgbgu82a|xn--mgbi4ecexp|xn--mgbpl2fh|xn--mgbt3dhd|xn--mgbtx2b|xn--mgbx4cd0ab|xn--mix891f|xn--mk1bu44c|xn--mxtq1m|xn--ngbc5azd|xn--ngbe9e0a|xn--ngbrx|xn--node|xn--nqv7f|xn--nqv7fs00ema|xn--nyqy26a|xn--o3cw4h|xn--ogbpf8fl|xn--otu796d|xn--p1acf|xn--p1ai|xn--pbt977c|xn--pgbs0dh|xn--pssy2u|xn--q9jyb4c|xn--qcka1pmc|xn--qxam|xn--rhqv96g|xn--rovu88b|xn--rvc1e0am3e|xn--s9brj9c|xn--ses554g|xn--t60b56a|xn--tckwe|xn--tiq49xqyj|xn--unup4y|xn--vermgensberater-ctb|xn--vermgensberatung-pwb|xn--vhquv|xn--vuq861b|xn--w4r85el8fhu5dnra|xn--w4rs40l|xn--wgbh1c|xn--wgbl6a|xn--xhq521b|xn--xkc2al3hye2a|xn--xkc2dl3a5ee0h|xn--y9a3aq|xn--yfro4i67o|xn--ygbi2ammx|xn--zfr164b|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)|(?:(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]))(?:[;/][^#?<>\s]*)?(?:\?[^#<>\s]*)?(?:#[^<>\s\(\)]*)?(?!\w))' + ) + } def scan(self, data, file, options, expire_at): - regex = options.get('regex', False) + regex = options.get("regex", False) if regex: - (key, value), = regex.items() + ((key, value),) = regex.items() if key not in self.regexes: self.regexes[key] = re.compile(value.encode()) url_regex = self.regexes[key] else: - url_regex = self.regexes['default'] + url_regex = self.regexes["default"] - normalized_data = b' '.join(data.split()) - self.event.setdefault('urls', []) + normalized_data = b" ".join(data.split()) + self.event.setdefault("urls", []) urls = url_regex.findall(normalized_data) for url in urls: - url = url.strip(b'!"#$%&\'()*+,-./@:;<=>[\\]^_`{|}~') - if url not in self.event['urls']: - self.event['urls'].append(url) + url = url.strip(b"!\"#$%&'()*+,-./@:;<=>[\\]^_`{|}~") + if url not in self.event["urls"]: + self.event["urls"].append(url) diff --git a/src/python/strelka/scanners/scan_vb.py b/src/python/strelka/scanners/scan_vb.py index 20eac318..dfcd654b 100644 --- a/src/python/strelka/scanners/scan_vb.py +++ b/src/python/strelka/scanners/scan_vb.py @@ -1,6 +1,5 @@ import pygments -from pygments import formatters -from pygments import lexers +from pygments import formatters, lexers from strelka import strelka @@ -11,8 +10,9 @@ class ScanVb(strelka.Scanner): Attributes: lexer: Pygments lexer ('vbnet') used to parse the file. """ + def init(self): - self.lexer = lexers.get_lexer_by_name('vbnet') + self.lexer = lexers.get_lexer_by_name("vbnet") def scan(self, data, file, options, expire_at): highlight = pygments.highlight( @@ -20,43 +20,43 @@ def scan(self, data, file, options, expire_at): self.lexer, formatters.RawTokenFormatter(), ) - highlight_list = highlight.split(b'\n') + highlight_list = highlight.split(b"\n") ordered_highlights = [] for hl in highlight_list: - split_highlight = hl.split(b'\t') + split_highlight = hl.split(b"\t") if len(split_highlight) == 2: token = split_highlight[0].decode() - value = split_highlight[1].decode().strip('\'"').strip() - highlight_entry = {'token': token, 'value': value} - if highlight_entry['value']: + value = split_highlight[1].decode().strip("'\"").strip() + highlight_entry = {"token": token, "value": value} + if highlight_entry["value"]: ordered_highlights.append(highlight_entry) - self.event.setdefault('tokens', []) - self.event.setdefault('comments', []) - self.event.setdefault('functions', []) - self.event.setdefault('names', []) - self.event.setdefault('operators', []) - self.event.setdefault('strings', []) + self.event.setdefault("tokens", []) + self.event.setdefault("comments", []) + self.event.setdefault("functions", []) + self.event.setdefault("names", []) + self.event.setdefault("operators", []) + self.event.setdefault("strings", []) position = 0 while position < len(ordered_highlights): ohlp = ordered_highlights[position] - if ohlp['token'] not in self.event['tokens']: - self.event['tokens'].append(ohlp['token']) - if ohlp['token'] == 'Token.Comment': - if ohlp['value'] not in self.event['comments']: - self.event['comments'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Name.Function': - if ohlp['value'] not in self.event['functions']: - self.event['functions'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Name': - if ohlp['value'] not in self.event['names']: - self.event['names'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Operator': - if ohlp['value'] not in self.event['operators']: - self.event['operators'].append(ohlp['value']) - elif ohlp['token'] == 'Token.Literal.String': - if ohlp['value'] not in self.event['strings']: - self.event['strings'].append(ohlp['value']) + if ohlp["token"] not in self.event["tokens"]: + self.event["tokens"].append(ohlp["token"]) + if ohlp["token"] == "Token.Comment": + if ohlp["value"] not in self.event["comments"]: + self.event["comments"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Name.Function": + if ohlp["value"] not in self.event["functions"]: + self.event["functions"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Name": + if ohlp["value"] not in self.event["names"]: + self.event["names"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Operator": + if ohlp["value"] not in self.event["operators"]: + self.event["operators"].append(ohlp["value"]) + elif ohlp["token"] == "Token.Literal.String": + if ohlp["value"] not in self.event["strings"]: + self.event["strings"].append(ohlp["value"]) position += 1 diff --git a/src/python/strelka/scanners/scan_vba.py b/src/python/strelka/scanners/scan_vba.py index 7585e9ca..7bad6050 100644 --- a/src/python/strelka/scanners/scan_vba.py +++ b/src/python/strelka/scanners/scan_vba.py @@ -10,47 +10,48 @@ class ScanVba(strelka.Scanner): analyze_macros: Boolean that determines if macros should be analyzed. Defaults to True. """ + def scan(self, data, file, options, expire_at): - analyze_macros = options.get('analyze_macros', True) + analyze_macros = options.get("analyze_macros", True) - self.event['total'] = {'files': 0, 'extracted': 0} + self.event["total"] = {"files": 0, "extracted": 0} try: vba = olevba.VBA_Parser(filename=file.name, data=data) if vba.detect_vba_macros(): extract_macros = list(vba.extract_macros()) - self.event['total']['files'] = len(extract_macros) + self.event["total"]["files"] = len(extract_macros) for (filename, stream_path, vba_filename, vba_code) in extract_macros: # Send extracted file back to Strelka - self.emit_file(vba_code, name=f'{vba_filename}') + self.emit_file(vba_code, name=f"{vba_filename}") - self.event['total']['extracted'] += 1 + self.event["total"]["extracted"] += 1 if analyze_macros: - self.event.setdefault('auto_exec', []) - self.event.setdefault('base64', []) - self.event.setdefault('dridex', []) - self.event.setdefault('hex', []) - self.event.setdefault('ioc', []) - self.event.setdefault('suspicious', []) + self.event.setdefault("auto_exec", []) + self.event.setdefault("base64", []) + self.event.setdefault("dridex", []) + self.event.setdefault("hex", []) + self.event.setdefault("ioc", []) + self.event.setdefault("suspicious", []) macros = vba.analyze_macros() for (macro_type, keyword, description) in macros: - if macro_type == 'AutoExec': - self.event['auto_exec'].append(keyword) - elif macro_type == 'Base64 String': - self.event['base64'].append(keyword) - elif macro_type == 'Dridex String': - self.event['dridex'].append(keyword) - elif macro_type == 'Hex String': - self.event['hex'].append(keyword) - elif macro_type == 'IOC': - self.event['ioc'].append(keyword) - elif macro_type == 'Suspicious': - self.event['suspicious'].append(keyword) + if macro_type == "AutoExec": + self.event["auto_exec"].append(keyword) + elif macro_type == "Base64 String": + self.event["base64"].append(keyword) + elif macro_type == "Dridex String": + self.event["dridex"].append(keyword) + elif macro_type == "Hex String": + self.event["hex"].append(keyword) + elif macro_type == "IOC": + self.event["ioc"].append(keyword) + elif macro_type == "Suspicious": + self.event["suspicious"].append(keyword) except olevba.FileOpenError: - self.flags.append('file_open_error') + self.flags.append("file_open_error") finally: # TODO referenced before potential assignment as vba is opened in a try / catch block vba.close() diff --git a/src/python/strelka/scanners/scan_vhd.py b/src/python/strelka/scanners/scan_vhd.py index 2f040266..aaab271e 100644 --- a/src/python/strelka/scanners/scan_vhd.py +++ b/src/python/strelka/scanners/scan_vhd.py @@ -256,4 +256,6 @@ def upload(self, name, expire_at): with open(name, "rb") as extracted_file: # Send extracted file back to Strelka - self.emit_file(extracted_file.read(), name=os.path.basename(extracted_file.name)) + self.emit_file( + extracted_file.read(), name=os.path.basename(extracted_file.name) + ) diff --git a/src/python/strelka/scanners/scan_x509.py b/src/python/strelka/scanners/scan_x509.py index 5eb86c76..9a24df10 100644 --- a/src/python/strelka/scanners/scan_x509.py +++ b/src/python/strelka/scanners/scan_x509.py @@ -15,22 +15,27 @@ class ScanX509(strelka.Scanner): scanned. Must be either 'der' or 'pem'. Defaults to empty string. """ + def scan(self, data, file, options, expire_at): - file_type = options.get('type', '') + file_type = options.get("type", "") - if file_type == 'der': + if file_type == "der": cert = X509.load_cert_der_string(data) else: cert = X509.load_cert_string(data) - self.event['issuer'] = cert.get_issuer().as_text() - self.event['subject'] = cert.get_subject().as_text() - self.event['serial_number'] = str(cert.get_serial_number()) - self.event['fingerprint'] = cert.get_fingerprint() - self.event['version'] = cert.get_version() - self.event['not_after'] = int(cert.get_not_after().get_datetime().strftime('%s')) - self.event['not_before'] = int(cert.get_not_before().get_datetime().strftime('%s')) - if self.event['not_after'] < time.time(): - self.event['expired'] = True + self.event["issuer"] = cert.get_issuer().as_text() + self.event["subject"] = cert.get_subject().as_text() + self.event["serial_number"] = str(cert.get_serial_number()) + self.event["fingerprint"] = cert.get_fingerprint() + self.event["version"] = cert.get_version() + self.event["not_after"] = int( + cert.get_not_after().get_datetime().strftime("%s") + ) + self.event["not_before"] = int( + cert.get_not_before().get_datetime().strftime("%s") + ) + if self.event["not_after"] < time.time(): + self.event["expired"] = True else: - self.event['expired'] = False + self.event["expired"] = False diff --git a/src/python/strelka/scanners/scan_xl4ma.py b/src/python/strelka/scanners/scan_xl4ma.py index b277bd01..e7a27f9a 100644 --- a/src/python/strelka/scanners/scan_xl4ma.py +++ b/src/python/strelka/scanners/scan_xl4ma.py @@ -1,5 +1,6 @@ # Authors: Ryan Borre from strelka.auxiliary.xl4ma import analyzer + from strelka import strelka diff --git a/src/python/strelka/scanners/scan_xml.py b/src/python/strelka/scanners/scan_xml.py index b17387b9..17c88abc 100644 --- a/src/python/strelka/scanners/scan_xml.py +++ b/src/python/strelka/scanners/scan_xml.py @@ -14,31 +14,32 @@ class ScanXml(strelka.Scanner): as metadata. Defaults to empty list. """ + def scan(self, data, file, options, expire_at): xml_args = { - 'extract_tags': options.get('extract_tags', []), - 'metadata_tags': options.get('metadata_tags', []), + "extract_tags": options.get("extract_tags", []), + "metadata_tags": options.get("metadata_tags", []), } self.expire_at = expire_at - self.event.setdefault('tags', []) - self.event.setdefault('tag_data', []) - self.event.setdefault('namespaces', []) - self.event['total'] = {'tags': 0, 'extracted': 0} + self.event.setdefault("tags", []) + self.event.setdefault("tag_data", []) + self.event.setdefault("namespaces", []) + self.event["total"] = {"tags": 0, "extracted": 0} xml = None try: xml_buffer = data - if xml_buffer.startswith(b' None: + def __init__( + self, + pointer: str = "", + parent: str = "", + depth: int = 0, + name: str = "", + source: str = "", + data: Optional[bytes] = None, + ) -> None: """Inits file object.""" self.data: Optional[bytes] = data self.depth: int = depth @@ -89,13 +98,13 @@ def __init__(self, pointer: str = '', def dictionary(self) -> dict: return { - 'depth': self.depth, - 'flavors': self.flavors, - 'name': self.name, - 'scanners': self.scanners, - 'size': self.size, - 'source': self.source, - 'tree': self.tree + "depth": self.depth, + "flavors": self.flavors, + "name": self.name, + "scanners": self.scanners, + "size": self.size, + "source": self.source, + "tree": self.tree, } def add_flavors(self, flavors: dict) -> None: @@ -117,27 +126,31 @@ def fn(signal_number: int, frame: Optional[FrameType]): class Backend(object): - def __init__(self, backend_cfg: dict, coordinator: Optional[redis.StrictRedis] = None) -> None: + def __init__( + self, backend_cfg: dict, coordinator: Optional[redis.StrictRedis] = None + ) -> None: self.scanner_cache: dict = {} self.backend_cfg: dict = backend_cfg self.coordinator: Optional[redis.StrictRedis] = coordinator - self.limits: dict = backend_cfg.get('limits', {}) - self.scanners: dict = backend_cfg.get('scanners', {}) + self.limits: dict = backend_cfg.get("limits", {}) + self.scanners: dict = backend_cfg.get("scanners", {}) self.compiled_magic = magic.Magic( - magic_file=backend_cfg.get('tasting', {}).get('mime_db', ''), + magic_file=backend_cfg.get("tasting", {}).get("mime_db", ""), mime=True, ) - yara_rules = backend_cfg.get('tasting', {}).get('yara_rules', '/etc/strelka/taste/') + yara_rules = backend_cfg.get("tasting", {}).get( + "yara_rules", "/etc/strelka/taste/" + ) if os.path.isdir(yara_rules): yara_filepaths = {} globbed_yara = glob.iglob( - f'{yara_rules}/**/*.yar*', + f"{yara_rules}/**/*.yar*", recursive=True, ) for (i, entry) in enumerate(globbed_yara): - yara_filepaths[f'namespace{i}'] = entry + yara_filepaths[f"namespace{i}"] = entry self.compiled_yara = yara.compile(filepaths=yara_filepaths) else: self.compiled_yara = yara.compile(filepath=yara_rules) @@ -154,12 +167,12 @@ def taste_yara(self, data: bytes) -> list: return [match.rule for match in yara_matches] def match_flavors(self, data: bytes) -> dict: - return {'mime': self.taste_mime(data), 'yara': self.taste_yara(data)} + return {"mime": self.taste_mime(data), "yara": self.taste_yara(data)} def work(self) -> None: """Process tasks from Redis coordinator""" - logging.info('starting up') + logging.info("starting up") if not self.coordinator: logging.error("no coordinator specified") @@ -167,18 +180,18 @@ def work(self) -> None: count = 0 work_start = time.time() - work_expire = work_start + self.limits.get('time_to_live', 900) + work_expire = work_start + self.limits.get("time_to_live", 900) while True: - if self.limits.get('max_files') != 0: - if count >= self.limits.get('max_files', 5000): + if self.limits.get("max_files") != 0: + if count >= self.limits.get("max_files", 5000): break - if self.limits.get('time_to_live') != 0: + if self.limits.get("time_to_live") != 0: if time.time() >= work_expire: break # Retrieve request task from Redis coordinator - task = self.coordinator.zpopmin('tasks', count=1) + task = self.coordinator.zpopmin("tasks", count=1) if len(task) == 0: time.sleep(0.25) continue @@ -196,9 +209,13 @@ def work(self) -> None: else: root_id = task_info["id"] try: - file = File(pointer=root_id, name=task_info["attributes"]["filename"]) + file = File( + pointer=root_id, name=task_info["attributes"]["filename"] + ) except KeyError as ex: - logging.debug(f"No filename attached (error: {ex}) to request: {task_item}") + logging.debug( + f"No filename attached (error: {ex}) to request: {task_item}" + ) file = File(pointer=root_id) expire_at = math.ceil(expire_at) @@ -218,23 +235,25 @@ def work(self) -> None: # Push completed event back to Redis to complete request p = self.coordinator.pipeline(transaction=False) - p.rpush(f'event:{root_id}', 'FIN') - p.expireat(f'event:{root_id}', expire_at) + p.rpush(f"event:{root_id}", "FIN") + p.expireat(f"event:{root_id}", expire_at) p.execute() # Reset timeout handler signal.alarm(0) except RequestTimeout: - logging.debug(f'request {root_id} timed out') + logging.debug(f"request {root_id} timed out") except Exception: signal.alarm(0) - logging.exception('unknown exception (see traceback below)') + logging.exception("unknown exception (see traceback below)") count += 1 - logging.info(f'shutdown after scanning {count} file(s) and' - f' {time.time() - work_start} second(s)') + logging.info( + f"shutdown after scanning {count} file(s) and" + f" {time.time() - work_start} second(s)" + ) def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: """Distributes a file through scanners. @@ -248,7 +267,7 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: """ try: - data = b'' + data = b"" files = [] events = [] @@ -257,10 +276,10 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: try: # Prepare timeout handler signal.signal(signal.SIGALRM, timeout_handler(DistributionTimeout)) - signal.alarm(self.limits.get('distribution', 600)) + signal.alarm(self.limits.get("distribution", 600)) - if file.depth > self.limits.get('max_depth', 15): - logging.info(f'request {root_id} exceeded maximum depth') + if file.depth > self.limits.get("max_depth", 15): + logging.info(f"request {root_id} exceeded maximum depth") return [] # Distribute can work local-only (data in File) or through a coordinator @@ -270,7 +289,7 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: elif self.coordinator: # Pull data for file from coordinator while True: - pop = self.coordinator.lpop(f'data:{file.pointer}') + pop = self.coordinator.lpop(f"data:{file.pointer}") if pop is None: break data += pop @@ -287,19 +306,19 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: scanner_list = self.match_scanners(file) tree_dict = { - 'node': file.uid, - 'parent': file.parent, - 'root': root_id, + "node": file.uid, + "parent": file.parent, + "root": root_id, } # Since root_id comes from the request, use that instead of the file's uid if file.depth == 0: - tree_dict['node'] = root_id + tree_dict["node"] = root_id if file.depth == 1: - tree_dict['parent'] = root_id + tree_dict["parent"] = root_id # Update the file object - file.scanners = [s.get('name') for s in scanner_list] + file.scanners = [s.get("name") for s in scanner_list] file.size = len(data) file.tree = tree_dict @@ -307,15 +326,19 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: for scanner in scanner_list: try: - name = scanner['name'] + name = scanner["name"] und_name = inflection.underscore(name) - scanner_import = f'strelka.scanners.{und_name}' + scanner_import = f"strelka.scanners.{und_name}" module = importlib.import_module(scanner_import) - if self.backend_cfg.get("caching", {"scanner": True}).get("scanner", True): + if self.backend_cfg.get("caching", {"scanner": True}).get( + "scanner", True + ): # Cache a copy of each scanner object if und_name not in self.scanner_cache: - attr = getattr(module, name)(self.backend_cfg, self.coordinator) + attr = getattr(module, name)( + self.backend_cfg, self.coordinator + ) self.scanner_cache[und_name] = attr plugin = self.scanner_cache[und_name] @@ -323,9 +346,11 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: plugin.files = [] plugin.flags = [] else: - plugin = getattr(module, name)(self.backend_cfg, self.coordinator) + plugin = getattr(module, name)( + self.backend_cfg, self.coordinator + ) - options = scanner.get('options', {}) + options = scanner.get("options", {}) # Run the scanner (scanner_files, scanner_event) = plugin.scan_wrapper( @@ -344,11 +369,13 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: } except ModuleNotFoundError: - logging.exception(f'scanner {scanner.get("name", "__missing__")} not found') + logging.exception( + f'scanner {scanner.get("name", "__missing__")} not found' + ) event = { - **{'file': file.dictionary()}, - **{'scan': scan}, + **{"file": file.dictionary()}, + **{"scan": scan}, } # Collect events for local-only @@ -356,15 +383,15 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: # Send event back to Redis coordinator if pipeline: - pipeline.rpush(f'event:{root_id}', format_event(event)) - pipeline.expireat(f'event:{root_id}', expire_at) + pipeline.rpush(f"event:{root_id}", format_event(event)) + pipeline.expireat(f"event:{root_id}", expire_at) pipeline.execute() signal.alarm(0) except DistributionTimeout: # FIXME: node id is not always file.uid - logging.exception(f'node {file.uid} timed out') + logging.exception(f"node {file.uid} timed out") # Re-ingest extracted files for scanner_file in files: @@ -378,7 +405,13 @@ def distribute(self, root_id: str, file: File, expire_at: int) -> list[dict]: return events - def match_scanner(self, scanner: str, mappings: list, file: File, ignore_wildcards: Optional[bool] = False) -> dict: + def match_scanner( + self, + scanner: str, + mappings: list, + file: File, + ignore_wildcards: Optional[bool] = False, + ) -> dict: """Matches a scanner to mappings and file data. Performs the task of assigning a scanner based on the scan configuration @@ -397,17 +430,19 @@ def match_scanner(self, scanner: str, mappings: list, file: File, ignore_wildcar Dictionary containing the assigned scanner or None. """ for mapping in mappings: - negatives = mapping.get('negative', {}) - positives = mapping.get('positive', {}) - neg_flavors = negatives.get('flavors', []) - neg_filename = negatives.get('filename', None) - neg_source = negatives.get('source', None) - pos_flavors = positives.get('flavors', []) - pos_filename = positives.get('filename', None) - pos_source = positives.get('source', None) - assigned = {'name': scanner, - 'priority': mapping.get('priority', 5), - 'options': mapping.get('options', {})} + negatives = mapping.get("negative", {}) + positives = mapping.get("positive", {}) + neg_flavors = negatives.get("flavors", []) + neg_filename = negatives.get("filename", None) + neg_source = negatives.get("source", None) + pos_flavors = positives.get("flavors", []) + pos_filename = positives.get("filename", None) + pos_source = positives.get("source", None) + assigned = { + "name": scanner, + "priority": mapping.get("priority", 5), + "options": mapping.get("options", {}), + } for neg_flavor in neg_flavors: if neg_flavor in itertools.chain(*file.flavors.values()): @@ -419,7 +454,9 @@ def match_scanner(self, scanner: str, mappings: list, file: File, ignore_wildcar if re.search(neg_source, file.source) is not None: return {} for pos_flavor in pos_flavors: - if (pos_flavor == '*' and not ignore_wildcards) or pos_flavor in itertools.chain(*file.flavors.values()): + if ( + pos_flavor == "*" and not ignore_wildcards + ) or pos_flavor in itertools.chain(*file.flavors.values()): return assigned if pos_filename is not None: if re.search(pos_filename, file.name) is not None: @@ -430,7 +467,9 @@ def match_scanner(self, scanner: str, mappings: list, file: File, ignore_wildcar return {} - def match_scanners(self, file: File, ignore_wildcards: Optional[bool] = False) -> list: + def match_scanners( + self, file: File, ignore_wildcards: Optional[bool] = False + ) -> list: """ Wraps match_scanner @@ -449,7 +488,7 @@ def match_scanners(self, file: File, ignore_wildcards: Optional[bool] = False) - scanner_list.append(scanner) scanner_list.sort( - key=lambda k: k.get('priority', 5), + key=lambda k: k.get("priority", 5), reverse=True, ) @@ -462,13 +501,13 @@ class IocOptions(object): string. """ - domain = 'domain' - url = 'url' - md5 = 'md5' - sha1 = 'sha1' - sha256 = 'sha256' - email = 'email' - ip = 'ip' + domain = "domain" + url = "url" + md5 = "md5" + sha1 = "sha1" + sha256 = "sha256" + email = "email" + ip = "ip" class Scanner(object): @@ -490,11 +529,13 @@ class Scanner(object): coordinator: Redis client connection to the coordinator. """ - def __init__(self, backend_cfg: dict, coordinator: Optional[redis.StrictRedis] = None) -> None: + def __init__( + self, backend_cfg: dict, coordinator: Optional[redis.StrictRedis] = None + ) -> None: """Inits scanner with scanner name and metadata key.""" self.name = self.__class__.__name__ - self.key = inflection.underscore(self.name.replace('Scan', '')) - self.scanner_timeout = backend_cfg.get('limits', {}).get('scanner', 10) + self.key = inflection.underscore(self.name.replace("Scan", "")) + self.scanner_timeout = backend_cfg.get("limits", {}).get("scanner", 10) self.coordinator = coordinator self.event: dict = dict() self.files: list = [] @@ -515,11 +556,7 @@ def timeout_handler(self, signal_number: int, frame: Optional[FrameType]) -> Non """Signal ScannerTimeout""" raise ScannerTimeout - def scan(self, - data, - file, - options, - expire_at) -> None: + def scan(self, data, file, options, expire_at) -> None: """Overrideable scan method. Args: @@ -530,11 +567,9 @@ def scan(self, """ pass - def scan_wrapper(self, - data: bytes, - file: File, - options: dict, - expire_at: int) -> Tuple[list[File], dict]: + def scan_wrapper( + self, data: bytes, file: File, options: dict, expire_at: int + ) -> Tuple[list[File], dict]: """Sets up scan attributes and calls scan method. Scanning code is wrapped in try/except for error handling. @@ -557,8 +592,9 @@ def scan_wrapper(self, """ start = time.time() self.event = dict() - self.scanner_timeout = options.get('scanner_timeout', - self.scanner_timeout or 10) + self.scanner_timeout = options.get( + "scanner_timeout", self.scanner_timeout or 10 + ) try: signal.signal(signal.SIGALRM, self.timeout_handler) @@ -567,7 +603,7 @@ def scan_wrapper(self, self.scan(data, file, options, expire_at) signal.alarm(0) except ScannerTimeout: - self.flags.append('timed_out') + self.flags.append("timed_out") except (DistributionTimeout, RequestTimeout): raise except ScannerException as e: @@ -575,29 +611,32 @@ def scan_wrapper(self, self.event.update({"exception": e.message}) except Exception as e: signal.alarm(0) - logging.exception(f'{self.name}: unhandled exception while scanning' - f' uid {file.uid if file else "_missing_"} (see traceback below)') - self.flags.append('uncaught_exception') - self.event.update({"exception": "\n".join(traceback.format_exception(e, limit=-10))}) + logging.exception( + f"{self.name}: unhandled exception while scanning" + f' uid {file.uid if file else "_missing_"} (see traceback below)' + ) + self.flags.append("uncaught_exception") + self.event.update( + {"exception": "\n".join(traceback.format_exception(e, limit=-10))} + ) self.event = { - **{'elapsed': round(time.time() - start, 6)}, - **{'flags': self.flags}, - **self.event + **{"elapsed": round(time.time() - start, 6)}, + **{"flags": self.flags}, + **self.event, } - return ( - self.files, - {self.key: self.event} - ) + return (self.files, {self.key: self.event}) - def emit_file(self, data: bytes, name: str = "", flavors: Optional[list[str]] = None) -> None: + def emit_file( + self, data: bytes, name: str = "", flavors: Optional[list[str]] = None + ) -> None: """Re-ingest extracted file""" extract_file = File( name=name, source=self.name, ) if flavors: - extract_file.add_flavors({'external': flavors}) + extract_file.add_flavors({"external": flavors}) if self.coordinator: for c in chunk_string(data): @@ -625,43 +664,67 @@ def upload_to_coordinator(self, pointer, chunk, expire_at) -> None: """ if self.coordinator: p = self.coordinator.pipeline(transaction=False) - p.rpush(f'data:{pointer}', chunk) - p.expireat(f'data:{pointer}', expire_at) + p.rpush(f"data:{pointer}", chunk) + p.expireat(f"data:{pointer}", expire_at) p.execute() - def process_ioc(self, ioc, ioc_type, scanner_name, description='', malicious=False) -> None: + def process_ioc( + self, ioc, ioc_type, scanner_name, description="", malicious=False + ) -> None: if not ioc: return - if ioc_type == 'url': + if ioc_type == "url": if validators.ipv4(self.extract(ioc).domain): - self.process_ioc(self.extract(ioc).domain, 'ip', scanner_name, description, malicious) + self.process_ioc( + self.extract(ioc).domain, "ip", scanner_name, description, malicious + ) else: - self.process_ioc(self.extract(ioc).registered_domain, 'domain', scanner_name, description, malicious) + self.process_ioc( + self.extract(ioc).registered_domain, + "domain", + scanner_name, + description, + malicious, + ) if not validators.url(ioc): logging.warning(f"{ioc} is not a valid url") return - elif ioc_type == 'ip': + elif ioc_type == "ip": try: ipaddress.ip_address(ioc) except ValueError: logging.warning(f"{ioc} is not a valid IP") return - elif ioc_type == 'domain': + elif ioc_type == "domain": if not validators.domain(ioc): logging.warning(f"{ioc} is not a valid domain") return - elif ioc_type == 'email': + elif ioc_type == "email": if not validators.email(ioc): logging.warning(f"{ioc} is not a valid email") return if malicious: - self.iocs.append({'ioc': ioc, 'ioc_type': ioc_type, 'scanner': scanner_name, 'description': description, - 'malicious': True}) + self.iocs.append( + { + "ioc": ioc, + "ioc_type": ioc_type, + "scanner": scanner_name, + "description": description, + "malicious": True, + } + ) else: - self.iocs.append({'ioc': ioc, 'ioc_type': ioc_type, 'scanner': scanner_name, 'description': description}) + self.iocs.append( + { + "ioc": ioc, + "ioc_type": ioc_type, + "scanner": scanner_name, + "description": description, + } + ) - def add_iocs(self, ioc, ioc_type, description='', malicious=False) -> None: + def add_iocs(self, ioc, ioc_type, description="", malicious=False) -> None: """Adds ioc to the iocs. :param ioc: The IOC or list of IOCs to be added. All iocs must be of the same type. Must be type String or Bytes. :param ioc_type: Must be one of md5, sha1, sha256, domain, url, email, ip, either as string or type object (e.g. self.type.domain). @@ -670,9 +733,11 @@ def add_iocs(self, ioc, ioc_type, description='', malicious=False) -> None: Malware Command and Control. Should not be used solely for determining maliciousness since testing values may be present. """ try: - accepted_iocs = ['md5', 'sha1', 'sha256', 'domain', 'url', 'email', 'ip'] + accepted_iocs = ["md5", "sha1", "sha256", "domain", "url", "email", "ip"] if ioc_type not in accepted_iocs: - logging.warning(f"{ioc_type} not in accepted range. Acceptable ioc types are: {accepted_iocs}") + logging.warning( + f"{ioc_type} not in accepted range. Acceptable ioc types are: {accepted_iocs}" + ) return if isinstance(ioc, list): for i in ioc: @@ -680,17 +745,31 @@ def add_iocs(self, ioc, ioc_type, description='', malicious=False) -> None: i = i.decode() if not isinstance(i, str): logging.warning( - f"Could not process {i} from {self.name}: Type {type(i)} is not type Bytes or String") + f"Could not process {i} from {self.name}: Type {type(i)} is not type Bytes or String" + ) continue - self.process_ioc(i, ioc_type, self.name, description=description, malicious=malicious) + self.process_ioc( + i, + ioc_type, + self.name, + description=description, + malicious=malicious, + ) else: if isinstance(ioc, bytes): ioc = ioc.decode() if not isinstance(ioc, str): logging.warning( - f"Could not process {ioc} from {self.name}: Type {type(ioc)} is not type Bytes or String") + f"Could not process {ioc} from {self.name}: Type {type(ioc)} is not type Bytes or String" + ) return - self.process_ioc(ioc, ioc_type, self.name, description=description, malicious=malicious) + self.process_ioc( + ioc, + ioc_type, + self.name, + description=description, + malicious=malicious, + ) except Exception as e: logging.error(f"Failed to add {ioc} from {self.name}: {e}") @@ -707,7 +786,7 @@ def chunk_string(s, chunk=1024 * 16) -> Generator[bytes, None, None]: s = bytes(s) for c in range(0, len(s), chunk): - yield s[c:c + chunk] + yield s[c : c + chunk] def format_event(metadata: dict) -> str: @@ -730,12 +809,12 @@ def format_event(metadata: dict) -> str: def visit(path, key, value): if isinstance(value, (bytes, bytearray)): - value = str(value, encoding='UTF-8', errors='replace') + value = str(value, encoding="UTF-8", errors="replace") return key, value remap1 = iterutils.remap(metadata, visit=visit) remap2 = iterutils.remap( remap1, - lambda p, k, v: v != '' and v != [] and v != {} and v is not None, + lambda p, k, v: v != "" and v != [] and v != {} and v is not None, ) return json.dumps(remap2) diff --git a/src/python/strelka/tests/__init__.py b/src/python/strelka/tests/__init__.py index c23ddd58..274759dd 100644 --- a/src/python/strelka/tests/__init__.py +++ b/src/python/strelka/tests/__init__.py @@ -1,14 +1,11 @@ import datetime from pathlib import Path + from strelka.strelka import File def run_test_scan( - mocker, - scan_class, - fixture_path=None, - options=None, - backend_cfg=None + mocker, scan_class, fixture_path=None, options=None, backend_cfg=None ): if options is None: options = {} diff --git a/src/python/strelka/tests/test_distribute.py b/src/python/strelka/tests/test_distribute.py index fc83087f..de4425af 100644 --- a/src/python/strelka/tests/test_distribute.py +++ b/src/python/strelka/tests/test_distribute.py @@ -1,11 +1,11 @@ import os import time import uuid -import yaml - from pathlib import Path from unittest import TestCase, mock +import yaml + from strelka import strelka @@ -105,7 +105,7 @@ def test_distribute(mocker): backend = strelka.Backend(backend_cfg, coordinator=None) with open( - Path(Path(__file__).parent / f"../tests/fixtures/test.html"), "rb" + Path(Path(__file__).parent / "../tests/fixtures/test.html"), "rb" ) as test_file: data = test_file.read() file = strelka.File(data=data) diff --git a/src/python/strelka/tests/test_required_for_scanner.py b/src/python/strelka/tests/test_required_for_scanner.py index 230290b1..fc99382a 100644 --- a/src/python/strelka/tests/test_required_for_scanner.py +++ b/src/python/strelka/tests/test_required_for_scanner.py @@ -1,7 +1,7 @@ import warnings -from pathlib import Path from os import walk from os.path import isfile, join +from pathlib import Path def test_required_for_scanner(mocker): diff --git a/src/python/strelka/tests/test_scan_capa.py b/src/python/strelka/tests/test_scan_capa.py index b69a76dd..b56116fd 100644 --- a/src/python/strelka/tests/test_scan_capa.py +++ b/src/python/strelka/tests/test_scan_capa.py @@ -1,7 +1,7 @@ from pathlib import Path -from pytest_unordered import unordered from unittest import TestCase, mock +from pytest_unordered import unordered from strelka.scanners.scan_capa import ScanCapa as ScanUnderTest from strelka.tests import run_test_scan @@ -24,7 +24,7 @@ def test_scan_capa_dotnet(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.exe", - options={"scanner_timeout": 20} + options={"scanner_timeout": 20}, ) TestCase.maxDiff = None @@ -49,7 +49,7 @@ def test_scan_capa_elf(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.elf", - options={"scanner_timeout": 20} + options={"scanner_timeout": 20}, ) TestCase.maxDiff = None @@ -65,13 +65,15 @@ def test_scan_capa_pe_xor(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": [], - "matches": unordered([ - "encode data using XOR", - "contains PDB path", - "contain a resource (.rsrc) section", - "parse PE header", - "contain loop", - ]), + "matches": unordered( + [ + "encode data using XOR", + "contains PDB path", + "contain a resource (.rsrc) section", + "parse PE header", + "contain loop", + ] + ), "mitre_ids": unordered(["T1129", "T1027"]), "mitre_techniques": unordered( [ @@ -85,7 +87,7 @@ def test_scan_capa_pe_xor(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_xor.exe", - options={"scanner_timeout": 20} + options={"scanner_timeout": 20}, ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests/test_scan_delay.py b/src/python/strelka/tests/test_scan_delay.py index dbd06121..3e1e237c 100644 --- a/src/python/strelka/tests/test_scan_delay.py +++ b/src/python/strelka/tests/test_scan_delay.py @@ -13,9 +13,7 @@ def test_scan_delay(mocker): test_scan_event = {"elapsed": mock.ANY, "flags": ["timed_out"]} scanner_event = run_test_scan( - mocker=mocker, - scan_class=ScanUnderTest, - options={"scanner_timeout": 1} + mocker=mocker, scan_class=ScanUnderTest, options={"scanner_timeout": 1} ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests/test_scan_email.py b/src/python/strelka/tests/test_scan_email.py index e5371298..22f67b1a 100644 --- a/src/python/strelka/tests/test_scan_email.py +++ b/src/python/strelka/tests/test_scan_email.py @@ -1,7 +1,7 @@ from pathlib import Path -from pytest_unordered import unordered from unittest import TestCase, mock +from pytest_unordered import unordered from strelka.scanners.scan_email import ScanEmail as ScanUnderTest from strelka.tests import run_test_scan diff --git a/src/python/strelka/tests/test_scan_encrypted_doc.py b/src/python/strelka/tests/test_scan_encrypted_doc.py index e77cb981..98c1584d 100644 --- a/src/python/strelka/tests/test_scan_encrypted_doc.py +++ b/src/python/strelka/tests/test_scan_encrypted_doc.py @@ -14,14 +14,19 @@ def test_scan_encrypted_doc(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": ["cracked_by_wordlist"], - "cracked_password": b"Password1!" + "cracked_password": b"Password1!", } scanner_event = run_test_scan( mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_password.doc", - options={"log_pws": True, "password_file": str(Path(Path(__file__).parent / "helpers/test_passwords.dat"))} + options={ + "log_pws": True, + "password_file": str( + Path(Path(__file__).parent / "helpers/test_passwords.dat") + ), + }, ) TestCase.maxDiff = None @@ -37,14 +42,19 @@ def test_scan_encrypted_docx(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": ["cracked_by_wordlist"], - "cracked_password": b"Password1!" + "cracked_password": b"Password1!", } scanner_event = run_test_scan( mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_password.docx", - options={"log_pws": True, "password_file": str(Path(Path(__file__).parent / "helpers/test_passwords.dat"))} + options={ + "log_pws": True, + "password_file": str( + Path(Path(__file__).parent / "helpers/test_passwords.dat") + ), + }, ) TestCase.maxDiff = None @@ -60,14 +70,20 @@ def test_scan_encrypted_doc_brute(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": ["cracked_by_incremental"], - "cracked_password": b"aaa" + "cracked_password": b"aaa", } scanner_event = run_test_scan( mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_password_brute.doc", - options={"scanner_timeout": 120, "log_pws": True, "brute_force": True, "min_length": 1, "max_length": 3} + options={ + "scanner_timeout": 120, + "log_pws": True, + "brute_force": True, + "min_length": 1, + "max_length": 3, + }, ) TestCase.maxDiff = None @@ -83,14 +99,20 @@ def test_scan_encrypted_docx_brute(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": ["cracked_by_incremental"], - "cracked_password": b"aaa" + "cracked_password": b"aaa", } scanner_event = run_test_scan( mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_password_brute.docx", - options={"scanner_timeout": 120, "log_pws": True, "brute_force": True, "min_length": 1, "max_length": 3} + options={ + "scanner_timeout": 120, + "log_pws": True, + "brute_force": True, + "min_length": 1, + "max_length": 3, + }, ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests/test_scan_exception.py b/src/python/strelka/tests/test_scan_exception.py index 4dcaf5e4..1a39d328 100644 --- a/src/python/strelka/tests/test_scan_exception.py +++ b/src/python/strelka/tests/test_scan_exception.py @@ -16,10 +16,7 @@ def test_scan_exception(mocker): "exception": mock.ANY, } - scanner_event = run_test_scan( - mocker=mocker, - scan_class=ScanUnderTest - ) + scanner_event = run_test_scan(mocker=mocker, scan_class=ScanUnderTest) TestCase.maxDiff = None TestCase().assertDictEqual(test_scan_event, scanner_event) diff --git a/src/python/strelka/tests/test_scan_html.py b/src/python/strelka/tests/test_scan_html.py index 763b00f1..0171ce62 100644 --- a/src/python/strelka/tests/test_scan_html.py +++ b/src/python/strelka/tests/test_scan_html.py @@ -79,11 +79,13 @@ def test_scan_html_max_hyperlinks(mocker): }, "title": "Sample HTML File", "hyperlinks_count": 7, - "hyperlinks": ['https://www.example.com', - 'https://www.example2.com', - 'https://www.example3.com', - 'https://www.example.com/downloads/example.pdf', - 'https://www.example.com/images/example.jpg'], + "hyperlinks": [ + "https://www.example.com", + "https://www.example2.com", + "https://www.example3.com", + "https://www.example.com/downloads/example.pdf", + "https://www.example.com/images/example.jpg", + ], "forms": [], "frames": [], "inputs": [], @@ -95,9 +97,11 @@ def test_scan_html_max_hyperlinks(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_hyperlinks.html", - options={"max_hyperlinks": MAX_SIZE_OPTION} + options={"max_hyperlinks": MAX_SIZE_OPTION}, ) TestCase.maxDiff = None - TestCase().assertLessEqual(len(test_scan_event['hyperlinks']), MAX_SIZE_OPTION) - TestCase().assertTrue(test_scan_event["hyperlinks_count"], scanner_event["hyperlinks_count"]) + TestCase().assertLessEqual(len(test_scan_event["hyperlinks"]), MAX_SIZE_OPTION) + TestCase().assertTrue( + test_scan_event["hyperlinks_count"], scanner_event["hyperlinks_count"] + ) diff --git a/src/python/strelka/tests/test_scan_iso.py b/src/python/strelka/tests/test_scan_iso.py index db5e7879..d35aeae9 100644 --- a/src/python/strelka/tests/test_scan_iso.py +++ b/src/python/strelka/tests/test_scan_iso.py @@ -1,4 +1,3 @@ -import datetime from pathlib import Path from unittest import TestCase, mock diff --git a/src/python/strelka/tests/test_scan_pcap.py b/src/python/strelka/tests/test_scan_pcap.py index c2eb50de..5ed6d4c4 100644 --- a/src/python/strelka/tests/test_scan_pcap.py +++ b/src/python/strelka/tests/test_scan_pcap.py @@ -1,7 +1,7 @@ from pathlib import Path -from pytest_unordered import unordered from unittest import TestCase, mock +from pytest_unordered import unordered from strelka.scanners.scan_pcap import ScanPcap as ScanUnderTest from strelka.tests import run_test_scan @@ -84,7 +84,7 @@ def test_scan_pcap(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.pcap", - options={"scanner_timeout": 20} + options={"scanner_timeout": 20}, ) TestCase.maxDiff = None @@ -169,7 +169,7 @@ def test_scan_pcap_ng(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.pcapng", - options={"scanner_timeout": 20} + options={"scanner_timeout": 20}, ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests/test_scan_pe.py b/src/python/strelka/tests/test_scan_pe.py index 8e363f92..99de82b8 100644 --- a/src/python/strelka/tests/test_scan_pe.py +++ b/src/python/strelka/tests/test_scan_pe.py @@ -1,7 +1,7 @@ from pathlib import Path -from pytest_unordered import unordered from unittest import TestCase, mock +from pytest_unordered import unordered from strelka.scanners.scan_pe import ScanPe as ScanUnderTest from strelka.tests import run_test_scan diff --git a/src/python/strelka/tests/test_scan_png_eof.py b/src/python/strelka/tests/test_scan_png_eof.py index 1f1a5966..a8cd3f83 100644 --- a/src/python/strelka/tests/test_scan_png_eof.py +++ b/src/python/strelka/tests/test_scan_png_eof.py @@ -34,10 +34,7 @@ def test_scan_png_eof_normal(mocker): Failure: Unable to load file or sample event fails to match. """ - test_scan_event = { - "elapsed": mock.ANY, - "flags": ["no_trailer"] - } + test_scan_event = {"elapsed": mock.ANY, "flags": ["no_trailer"]} scanner_event = run_test_scan( mocker=mocker, @@ -55,10 +52,7 @@ def test_scan_png_eof_no_iend(mocker): Failure: Unable to load file or sample event fails to match. """ - test_scan_event = { - "elapsed": mock.ANY, - "flags": ["no_iend_chunk"] - } + test_scan_event = {"elapsed": mock.ANY, "flags": ["no_iend_chunk"]} scanner_event = run_test_scan( mocker=mocker, diff --git a/src/python/strelka/tests/test_scan_rar.py b/src/python/strelka/tests/test_scan_rar.py index 72b5c803..1be94e30 100644 --- a/src/python/strelka/tests/test_scan_rar.py +++ b/src/python/strelka/tests/test_scan_rar.py @@ -1,4 +1,3 @@ -import datetime from pathlib import Path from unittest import TestCase, mock diff --git a/src/python/strelka/tests/test_scan_seven_zip.py b/src/python/strelka/tests/test_scan_seven_zip.py index 6c9c8610..b38ff81e 100644 --- a/src/python/strelka/tests/test_scan_seven_zip.py +++ b/src/python/strelka/tests/test_scan_seven_zip.py @@ -85,7 +85,7 @@ def test_scan_sevenzip_wordlist(mocker): ], "hidden_dirs": ["hidden"], "meta": {"7zip_version": "22.01"}, - "cracked_password": b"password" + "cracked_password": b"password", } scanner_event = run_test_scan( @@ -138,7 +138,7 @@ def test_scan_sevenzip_wordlist_filenames(mocker): ], "hidden_dirs": ["hidden"], "meta": {"7zip_version": "22.01"}, - "cracked_password": b"password" + "cracked_password": b"password", } scanner_event = run_test_scan( @@ -169,7 +169,7 @@ def test_scan_sevenzip_nocrack_filenames(mocker): "total": {"files": 0, "extracted": 0}, "files": [], "hidden_dirs": [], - "meta": {"7zip_version": "22.01"} + "meta": {"7zip_version": "22.01"}, } scanner_event = run_test_scan( diff --git a/src/python/strelka/tests/test_scan_tlsh.py b/src/python/strelka/tests/test_scan_tlsh.py index 99d73add..1137ffc9 100644 --- a/src/python/strelka/tests/test_scan_tlsh.py +++ b/src/python/strelka/tests/test_scan_tlsh.py @@ -20,7 +20,7 @@ def test_scan_tlsh(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.txt", - options={"location": str(Path(Path(__file__).parent / "fixtures/test.yaml"))} + options={"location": str(Path(Path(__file__).parent / "fixtures/test.yaml"))}, ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests/test_scan_vhd.py b/src/python/strelka/tests/test_scan_vhd.py index ada1dc4f..f63c57e1 100644 --- a/src/python/strelka/tests/test_scan_vhd.py +++ b/src/python/strelka/tests/test_scan_vhd.py @@ -69,8 +69,6 @@ def test_scan_vhdx(mocker): Failure: Unable to load file or sample event fails to match. """ - fixture_path = "fixtures/test.vhdx" - test_scan_event = { "elapsed": mock.ANY, "flags": [], diff --git a/src/python/strelka/tests/test_scan_x509.py b/src/python/strelka/tests/test_scan_x509.py index e0145f58..e73ed16c 100644 --- a/src/python/strelka/tests/test_scan_x509.py +++ b/src/python/strelka/tests/test_scan_x509.py @@ -28,7 +28,7 @@ def test_scan_x509_pem(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.pem", - options={"type": "pem"} + options={"type": "pem"}, ) TestCase.maxDiff = None @@ -58,7 +58,7 @@ def test_scan_x509_der(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.der", - options={"type": "der"} + options={"type": "der"}, ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests/test_scan_xl4ma.py b/src/python/strelka/tests/test_scan_xl4ma.py index d6ca983c..965a9379 100644 --- a/src/python/strelka/tests/test_scan_xl4ma.py +++ b/src/python/strelka/tests/test_scan_xl4ma.py @@ -1,7 +1,7 @@ from pathlib import Path from unittest import TestCase, mock -from pytest_unordered import unordered +from pytest_unordered import unordered from strelka.scanners.scan_xl4ma import ScanXl4ma as ScanUnderTest from strelka.tests import run_test_scan @@ -15,7 +15,15 @@ def test_scan_xl4ma(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": [], - "decoded": unordered(['3', 'user', 'clean.xls', 'None', "https://www.example.com/path/to/resource"]), + "decoded": unordered( + [ + "3", + "user", + "clean.xls", + "None", + "https://www.example.com/path/to/resource", + ] + ), "iocs": ["https://www.example.com/path/to/resource"], } diff --git a/src/python/strelka/tests/test_scan_yara.py b/src/python/strelka/tests/test_scan_yara.py index a2284a41..9a55f1b7 100644 --- a/src/python/strelka/tests/test_scan_yara.py +++ b/src/python/strelka/tests/test_scan_yara.py @@ -23,7 +23,7 @@ def test_scan_yara(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test.txt", - options={"location": str(Path(Path(__file__).parent / "fixtures/test.yara"))} + options={"location": str(Path(Path(__file__).parent / "fixtures/test.yara"))}, ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests_configuration/test_scanner_assignment.py b/src/python/strelka/tests_configuration/test_scanner_assignment.py index c530dc25..6c9ae04d 100644 --- a/src/python/strelka/tests_configuration/test_scanner_assignment.py +++ b/src/python/strelka/tests_configuration/test_scanner_assignment.py @@ -1,10 +1,11 @@ +import os from pathlib import Path -from unittest import TestCase, mock +from unittest import TestCase -import os import pytest import redis import yaml + from strelka import strelka # Scanners that apply to all files (*) are not included @@ -40,7 +41,12 @@ "test.json": ["ScanJson"], "test.lnk": ["ScanExiftool", "ScanLNK"], "test.macho": ["ScanMacho"], - "test.msi": ['ScanExiftool', 'ScanMsi', 'ScanOle', 'ScanVba'], # TODO: Needs CDF subtype + "test.msi": [ + "ScanExiftool", + "ScanMsi", + "ScanOle", + "ScanVba", + ], # TODO: Needs CDF subtype "test.pcap": ["ScanPcap"], "test.pcapng": [], "test.pdf": ["ScanExiftool", "ScanPdf"], @@ -64,7 +70,7 @@ "test.vhdx": ["ScanVhd"], "test.webp": ["ScanExiftool", "ScanLsb", "ScanNf", "ScanOcr", "ScanQr"], "test.xar": ["ScanLibarchive"], - "test.xls": ['ScanExiftool', 'ScanOle', 'ScanVba', 'ScanXl4ma'], + "test.xls": ["ScanExiftool", "ScanOle", "ScanVba", "ScanXl4ma"], "test.xml": ["ScanXml"], "test.xz": ["ScanLzma"], "test.yara": ["ScanUrl"], diff --git a/src/python/strelka/tests_configuration/test_taste.py b/src/python/strelka/tests_configuration/test_taste.py index c8a3575b..acbdf2d4 100644 --- a/src/python/strelka/tests_configuration/test_taste.py +++ b/src/python/strelka/tests_configuration/test_taste.py @@ -1,18 +1,20 @@ import os +from pathlib import Path +from unittest import TestCase + +import pytest import redis import yaml -import pytest - -from pathlib import Path -from unittest import TestCase, mock from strelka import strelka - taste_expectations: dict = { "test.7z": {"mime": ["application/x-7z-compressed"], "yara": ["_7zip_file"]}, "test.b64": {"mime": ["text/plain"], "yara": []}, # FIXME: No file-specific match - "test.bat": {"mime": ["text/x-msdos-batch"], "yara": []}, # FIXME: Not in backend.cfg + "test.bat": { + "mime": ["text/x-msdos-batch"], + "yara": [], + }, # FIXME: Not in backend.cfg "test.bz2": {"mime": ["application/x-bzip2"], "yara": ["bzip2_file"]}, "test.cpio": {"mime": ["application/x-cpio"], "yara": []}, "test.deb": { @@ -47,7 +49,10 @@ "yara": ["olecf_file"], }, # TODO: CDF format needs subtypes "test.pcap": {"mime": ["application/vnd.tcpdump.pcap"], "yara": ["pcap_file"]}, - "test.pcapng": {"mime": ["application/octet-stream"], "yara": []}, # FIXME: pcapng_file broken + "test.pcapng": { + "mime": ["application/octet-stream"], + "yara": [], + }, # FIXME: pcapng_file broken "test.pdf": {"mime": ["application/pdf"], "yara": ["pdf_file"]}, "test.pem": {"mime": ["text/plain"], "yara": ["x509_pem_file"]}, "test.plist": {"mime": ["text/xml"], "yara": ["plist_file", "xml_file"]}, @@ -56,13 +61,19 @@ "test.tar": {"mime": ["application/x-tar"], "yara": ["tar_file"]}, "test.txt": {"mime": ["text/plain"], "yara": []}, "test.txt.asc": {"mime": ["text/PGP"], "yara": ["pgp_file"]}, - "test.txt.gpg": {"mime": ["application/octet-stream"], "yara": []}, # FIXME: Need binary PGP yara signature + "test.txt.gpg": { + "mime": ["application/octet-stream"], + "yara": [], + }, # FIXME: Need binary PGP yara signature "test.url": {"mime": ["text/plain"], "yara": []}, "test.vhd": {"mime": ["application/octet-stream"], "yara": ["vhd_file"]}, "test.vhdx": {"mime": ["application/octet-stream"], "yara": ["vhdx_file"]}, "test.webp": {"mime": ["image/webp"], "yara": []}, "test.xar": {"mime": ["application/x-xar"], "yara": ["xar_file"]}, - "test.xls": {"mime": ["application/vnd.ms-excel"], "yara": ["excel4_file", "olecf_file"]}, + "test.xls": { + "mime": ["application/vnd.ms-excel"], + "yara": ["excel4_file", "olecf_file"], + }, "test.xml": {"mime": ["text/xml"], "yara": ["xml_file"]}, "test.xz": {"mime": ["application/x-xz"], "yara": ["xz_file"]}, "test.yara": {"mime": ["text/plain"], "yara": []}, @@ -85,22 +96,34 @@ "mime": ["application/json"], "yara": ["browser_manifest", "json_file"], }, - "test_password.7z": {"mime": ["application/x-7z-compressed"], "yara": ["_7zip_file"]}, + "test_password.7z": { + "mime": ["application/x-7z-compressed"], + "yara": ["_7zip_file"], + }, "test_password.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, "test_password.docx": { "mime": ["application/encrypted"], "yara": ["encrypted_word_document", "olecf_file"], }, - "test_password_brute.7z": {"mime": ["application/x-7z-compressed"], "yara": ["_7zip_file"]}, + "test_password_brute.7z": { + "mime": ["application/x-7z-compressed"], + "yara": ["_7zip_file"], + }, "test_password_brute.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, "test_password_brute.docx": { "mime": ["application/encrypted"], "yara": ["encrypted_word_document", "olecf_file"], }, - "test_password_filenames.7z": {"mime": ["application/x-7z-compressed"], "yara": ["_7zip_file"]}, + "test_password_filenames.7z": { + "mime": ["application/x-7z-compressed"], + "yara": ["_7zip_file"], + }, "test_pe.b64": {"mime": ["text/plain"], "yara": ["base64_pe"]}, "test_pe_object.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, - "test_pe_object_classic.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, + "test_pe_object_classic.doc": { + "mime": ["application/msword"], + "yara": ["olecf_file"], + }, "test_pe_overlay.bmp": {"mime": ["image/bmp"], "yara": ["bmp_file"]}, "test_pe_overlay.jpg": {"mime": ["image/jpeg"], "yara": ["jpeg_file"]}, "test_pe_overlay.png": {"mime": ["image/png"], "yara": ["png_file"]}, @@ -134,7 +157,9 @@ } -@pytest.mark.parametrize("fixture_path,expected", [(k, v) for k, v in taste_expectations.items()]) +@pytest.mark.parametrize( + "fixture_path,expected", [(k, v) for k, v in taste_expectations.items()] +) def test_fixture_taste_output(fixture_path, expected) -> None: """ Pass: All test fixtures match the given yara and mime matches. @@ -144,7 +169,9 @@ def test_fixture_taste_output(fixture_path, expected) -> None: if os.path.exists("/etc/strelka/backend.yaml"): backend_cfg_path: str = "/etc/strelka/backend.yaml" else: - backend_cfg_path: str = Path(Path(__file__).parent / "../../../../configs/python/backend/backend.yaml") + backend_cfg_path: str = Path( + Path(__file__).parent / "../../../../configs/python/backend/backend.yaml" + ) with open(backend_cfg_path, "r") as f: backend_cfg = yaml.safe_load(f.read()) @@ -174,4 +201,8 @@ def test_taste_required() -> None: ) for test_fixture in test_fixtures: - TestCase().assertIn(os.path.basename(test_fixture), taste_expectations.keys(), msg="Fixture does not have a taste expectation") + TestCase().assertIn( + os.path.basename(test_fixture), + taste_expectations.keys(), + msg="Fixture does not have a taste expectation", + )