Merge pull request #306 from ryanohoro/check_scanners

Check Scanners At Startup, In Config Tests
target · Feb 2, 2023 · 5883766 · 5883766
2 parents 7241cbb + ca8c2d6
commit 5883766
Show file tree

Hide file tree

Showing 34 changed files with 51 additions and 83 deletions.
diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml
@@ -555,11 +555,6 @@ scanners:
       priority: 5
       options:
         limit: 1000
-  'ScanRuby':
-    - positive:
-        flavors:
-          - 'text/x-ruby'
-      priority: 5
   'ScanSevenZip':
     - positive:
         flavors:

diff --git a/src/python/strelka/cstructs/bits/structure.py b/src/python/strelka/cstructs/bits/structure.py
@@ -180,7 +180,6 @@ def __len__(self):
         return len(self.getData())
 
     def pack(self, format, data, field=None):
-
         if field:
             addressField = self.findAddressFieldFor(field)
             if (addressField is not None) and (data is None):
@@ -293,7 +292,6 @@ def pack(self, format, data, field=None):
         return pack(format, data)
 
     def unpack(self, format, data, dataClassOrCode=None, field=None):
-
         if field:
             addressField = self.findAddressFieldFor(field)
             if addressField is not None:
@@ -462,7 +460,6 @@ def calcPackSize(self, format, data, field=None):
         return calcsize(format)
 
     def calcUnpackSize(self, format, data, field=None):
-
         # void specifier
         if format[:1] == "_":
             return 0

diff --git a/src/python/strelka/scanners/common/password_cracking.py b/src/python/strelka/scanners/common/password_cracking.py
@@ -8,7 +8,6 @@
 
 
 def convert_unit_john(jtr_number: str) -> float:
-
     if jtr_number.endswith("K"):
         return float(jtr_number[:-1]) * 1000
     elif jtr_number.endswith("M"):

diff --git a/src/python/strelka/scanners/scan_antiword.py b/src/python/strelka/scanners/scan_antiword.py
@@ -26,6 +26,5 @@ def scan(self, data, file, options, expire_at):
             ).communicate()
 
             if stdout:
-
                 # Send extracted file back to Strelka
                 self.emit_file(stdout, name="text")
diff --git a/src/python/strelka/scanners/scan_base64_pe.py b/src/python/strelka/scanners/scan_base64_pe.py
@@ -9,7 +9,6 @@ class ScanBase64PE(strelka.Scanner):
     """Decodes base64-encoded file."""
 
     def scan(self, data, file, options, expire_at):
-
         with io.BytesIO(data) as encoded_file:
             extract_data = b""
 
@@ -20,6 +19,5 @@ def scan(self, data, file, options, expire_at):
                 self.flags.append("not_decodable_from_base64")
 
             if extract_data:
-
                 # Send extracted file back to Strelka
                 self.emit_file(extract_data)
diff --git a/src/python/strelka/scanners/scan_bits.py b/src/python/strelka/scanners/scan_bits.py
@@ -64,7 +64,6 @@ def parse_qmgr10_job(self, file_entries, job_data):
         if len(job_data) < 128:
             return None
         try:
-
             # Because it can be expensive to parse a JOB structure if the data is not valid,
             # do a simple check to see if the job name length is valid
             name_length = struct.unpack_from("<L", job_data, 32)[0]
@@ -295,7 +294,6 @@ def parse(self):
                 for file in v:
                     file_dict = {}
                     for k1, v1 in file.items():
-
                         # Map the transaction attribute name, skip empty, unmapped, or invalid values
                         t_alias = self.FILE_MAP.get(k1)
                         if not t_alias:

diff --git a/src/python/strelka/scanners/scan_dmg.py b/src/python/strelka/scanners/scan_dmg.py
@@ -51,7 +51,6 @@ def extract_7zip(self, data, tmp_dir, scanner_timeout, expire_at, file_limit):
 
             try:
                 with tempfile.TemporaryDirectory() as tmp_extract:
-
                     try:
                         (stdout, stderr) = subprocess.Popen(
                             ["7zz", "x", tmp_data.name, f"-o{tmp_extract}"],
@@ -94,7 +93,6 @@ def get_all_items(root, exclude=None):
                         try:
                             relname = os.path.relpath(name, tmp_extract)
                             with open(name, "rb") as extracted_file:
-
                                 # Send extracted file back to Strelka
                                 self.emit_file(extracted_file.read(), name=relname)
 
@@ -150,7 +148,6 @@ def parse_7zip_stdout(self, output_7zip, file_limit):
             )
 
             def parse_file_modes(file_modes):
-
                 file_mode_list = []
 
                 for file_mode in file_modes:
@@ -171,7 +168,6 @@ def parse_file_modes(file_modes):
 
             for output_line in output_lines:
                 if output_line:
-
                     # Properties section
                     match = regex_mode_properties.match(output_line)
                     if match:
@@ -195,7 +191,6 @@ def parse_file_modes(file_modes):
 
                     # Header section
                     if not mode:
-
                         match = regex_7zip_version.match(output_line)
                         if match:
                             version = regex_7zip_version.match(output_line).group(1)
@@ -204,11 +199,9 @@ def parse_file_modes(file_modes):
                             continue
 
                     elif mode == "properties":
-
                         # Collect specific properties
                         match = regex_property.match(output_line)
                         if match:
-
                             if match.group(1) == "Label":
                                 partition["label"] = match.group(2)
                             elif match.group(1) == "Path":

diff --git a/src/python/strelka/scanners/scan_docx.py b/src/python/strelka/scanners/scan_docx.py
@@ -19,7 +19,6 @@ class ScanDocx(strelka.Scanner):
     def scan(self, data, file, options, expire_at):
         extract_text = options.get("extract_text", False)
         with io.BytesIO(data) as docx_io:
-
             try:
                 docx_doc = docx.Document(docx_io)
                 self.event["author"] = docx_doc.core_properties.author
@@ -53,7 +52,6 @@ def scan(self, data, file, options, expire_at):
                 self.event["image_count"] = 0
 
                 for paragraph in docx_doc.paragraphs:
-
                     soup = BeautifulSoup(paragraph.paragraph_format.element.xml, "xml")
                     color_list = soup.select("color")
 
@@ -77,7 +75,6 @@ def scan(self, data, file, options, expire_at):
                     self.event["white_text_in_doc"] = True
 
                 if extract_text:
-
                     text = ""
                     for paragraph in docx_doc.paragraphs:
                         text += f"{paragraph.text}\n"

diff --git a/src/python/strelka/scanners/scan_email.py b/src/python/strelka/scanners/scan_email.py
@@ -14,7 +14,6 @@ def scan(self, data, file, options, expire_at):
         self.event["total"] = {"attachments": 0, "extracted": 0}
 
         try:
-
             # Open and parse email byte string
             # If fail to open, return.
             try:

diff --git a/src/python/strelka/scanners/scan_encrypted_doc.py b/src/python/strelka/scanners/scan_encrypted_doc.py
@@ -49,7 +49,6 @@ def crack_word(
                 return
 
         if b"0 password hashes cracked" in stdout:
-
             with tempfile.NamedTemporaryFile(dir=tmp_dir) as tmp_data:
                 tmp_data.write(office2john)
                 tmp_data.flush()
@@ -117,7 +116,6 @@ class ScanEncryptedDoc(strelka.Scanner):
     """
 
     def scan(self, data, file, options, expire_at):
-
         jtr_path = options.get("jtr_path", "/jtr/")
         tmp_directory = options.get("tmp_file_directory", "/tmp/")
         password_file = options.get("password_file", "/etc/strelka/passwords.dat")
@@ -127,7 +125,6 @@ def scan(self, data, file, options, expire_at):
         max_length = options.get("max_length", 7)
 
         with io.BytesIO(data) as doc_io:
-
             msoff_doc = msoffcrypto.OfficeFile(doc_io)
             output_doc = io.BytesIO()
             if extracted_pw := crack_word(

diff --git a/src/python/strelka/scanners/scan_encrypted_zip.py b/src/python/strelka/scanners/scan_encrypted_zip.py
@@ -49,7 +49,6 @@ def crack_zip(
                 return
 
         if b"0 password hashes cracked" in stdout:
-
             with tempfile.NamedTemporaryFile(dir=tmp_dir) as tmp_data:
                 tmp_data.write(zip2john)
                 tmp_data.flush()
@@ -111,7 +110,6 @@ class ScanEncryptedZip(strelka.Scanner):
     """
 
     def scan(self, data, file, options, expire_at):
-
         jtr_path = options.get("jtr_path", "/jtr/")
         tmp_directory = options.get("tmp_file_directory", "/tmp/")
         file_limit = options.get("limit", 1000)
@@ -125,10 +123,8 @@ def scan(self, data, file, options, expire_at):
 
         with io.BytesIO(data) as zip_io:
             try:
-
                 is_aes = False
                 with pyzipper.ZipFile(zip_io) as zip_obj:
-
                     file_list = zip_obj.filelist  # .filelist
                     for file_list_item in file_list:
                         if not file_list_item.is_dir():
@@ -140,7 +136,6 @@ def scan(self, data, file, options, expire_at):
                 with pyzipper.AESZipFile(zip_io) if is_aes else pyzipper.ZipFile(
                     zip_io
                 ) as zip_obj:
-
                     file_list = zip_obj.filelist  # .filelist
                     for file_list_item in file_list:
                         if not file_list_item.is_dir():
@@ -175,7 +170,6 @@ def scan(self, data, file, options, expire_at):
                                 )
 
                                 if extract_data:
-
                                     # Send extracted file back to Strelka
                                     self.emit_file(
                                         extract_data, name=file_item.filename

diff --git a/src/python/strelka/scanners/scan_exception.py b/src/python/strelka/scanners/scan_exception.py
@@ -18,5 +18,4 @@ def init(self):
         pass
 
     def scan(self, data, file, options, expire_at):
-
         raise Exception("Scanner Exception")
diff --git a/src/python/strelka/scanners/scan_html.py b/src/python/strelka/scanners/scan_html.py
@@ -93,7 +93,7 @@ def scan(self, data, file, options, expire_at):
             scripts = soup.find_all("script")
             self.event["total"]["scripts"] = len(scripts)
             self.event.setdefault("scripts", [])
-            for (index, script) in enumerate(scripts):
+            for index, script in enumerate(scripts):
                 script_flavors = [
                     script.get("language", "").lower(),
                     script.get("type", "").lower(),

diff --git a/src/python/strelka/scanners/scan_jpeg.py b/src/python/strelka/scanners/scan_jpeg.py
@@ -10,7 +10,6 @@ class ScanJpeg(strelka.Scanner):
     """
 
     def scan(self, data, file, options, expire_at):
-
         offset = 0
 
         # Skip check for length with these markers
@@ -35,7 +34,6 @@ def scan(self, data, file, options, expire_at):
         # Skip SOI
         offset += 2
         while True:
-
             marker = data[offset : offset + 2]
 
             # Marker must start with 0xff

diff --git a/src/python/strelka/scanners/scan_json.py b/src/python/strelka/scanners/scan_json.py
@@ -25,7 +25,7 @@ def _get_keys(self, variable):
             variable: Variable to recursively parse.
         """
         if isinstance(variable, dict):
-            for (key, value) in variable.items():
+            for key, value in variable.items():
                 if key not in self.event["keys"]:
                     self.event["keys"].append(key)
                 self._get_keys(self, value)

diff --git a/src/python/strelka/scanners/scan_libarchive.py b/src/python/strelka/scanners/scan_libarchive.py
@@ -26,7 +26,6 @@ def scan(self, data, file, options, expire_at):
                         self.event["total"]["files"] += 1
 
             with libarchive.memory_reader(data) as archive:
-
                 for entry in archive:
                     if entry.isfile:
                         if self.event["total"]["extracted"] >= file_limit:

diff --git a/src/python/strelka/scanners/scan_macho.py b/src/python/strelka/scanners/scan_macho.py
@@ -195,7 +195,6 @@ def scan(self, data, file, options, expire_at):
                     tmp_data.flush()
 
                     with open(tmp_data.name, "rb") as f:
-
                         # Send extracted file back to Strelka
                         self.emit_file(f.read(), name=f"binary_{r}")
 

diff --git a/src/python/strelka/scanners/scan_ocr.py b/src/python/strelka/scanners/scan_ocr.py
@@ -41,7 +41,6 @@ def scan(self, data, file, options, expire_at):
                         ocr_file = tess_txt.read()
 
                         if ocr_file:
-
                             if split_words:
                                 self.event["text"] = ocr_file.split()
                             else:
@@ -52,7 +51,6 @@ def scan(self, data, file, options, expire_at):
                                 )
 
                             if extract_text:
-
                                 # Send extracted file back to Strelka
                                 self.emit_file(ocr_file, name="text")
 

diff --git a/src/python/strelka/scanners/scan_pcap.py b/src/python/strelka/scanners/scan_pcap.py
@@ -37,7 +37,6 @@ def scan(self, data, file, options, expire_at):
             tmp_data.seek(0)
 
             with tempfile.TemporaryDirectory() as tmp_extract:
-
                 try:
                     (stdout, stderr) = subprocess.Popen(
                         [
@@ -57,14 +56,12 @@ def scan(self, data, file, options, expire_at):
                         with open(
                             os.path.join(tmp_extract, "files.log"), "r"
                         ) as json_file:
-
                             # files.log is one JSON object per line, convert to array
                             file_events = json.loads(
                                 "[" + ",".join(json_file.read().splitlines()) + "]"
                             )
 
                             for file_event in file_events:
-
                                 if self.event["total"]["extracted"] >= file_limit:
                                     self.flags.append("pcap_file_limit_error")
                                     break
@@ -96,6 +93,5 @@ def scan(self, data, file, options, expire_at):
     def upload(self, name, expire_at):
         """Send extracted file to coordinator"""
         with open(name, "rb") as extracted_file:
-
             # Send extracted file back to Strelka
             self.emit_file(extracted_file.read())
diff --git a/src/python/strelka/scanners/scan_pgp.py b/src/python/strelka/scanners/scan_pgp.py
@@ -42,7 +42,6 @@ def scan(self, data, file, options, expire_at):
             self.flags.append("pgpdump_error")
 
     def parse_pgpdump(self, data):
-
         pgpdump_data = None
 
         try:

diff --git a/src/python/strelka/scanners/scan_png_eof.py b/src/python/strelka/scanners/scan_png_eof.py
@@ -8,7 +8,6 @@ class ScanPngEof(strelka.Scanner):
     """
 
     def scan(self, data, file, options, expire_at):
-
         # PNG IEND chunk
         png_iend = b"\x00\x00\x00\x00\x49\x45\x4e\x44\xae\x42\x60\x82"
 
@@ -18,7 +17,6 @@ def scan(self, data, file, options, expire_at):
         else:
             # Locate the first occurance of the IEND chunk, the end of PNG file
             if -1 != (trailer_index := data.find(png_iend)):
-
                 trailer_index = trailer_index + len(png_iend)
                 self.event["trailer_index"] = trailer_index
                 self.event["PNG_EOF"] = data[trailer_index:]

diff --git a/src/python/strelka/scanners/scan_rar.py b/src/python/strelka/scanners/scan_rar.py
@@ -119,7 +119,6 @@ def scan(self, data, file, options, expire_at):
                                     self.flags.append("no_password_match_found")
 
                                 if extract_data:
-
                                     # Send extracted file back to Strelka
                                     self.emit_file(
                                         extract_data, name=f"{file_info.filename}"

diff --git a/src/python/strelka/scanners/scan_rpm.py b/src/python/strelka/scanners/scan_rpm.py
@@ -23,7 +23,7 @@ def scan(self, data, file, options, expire_at):
             try:
                 with rpmfile.open(tmp_data.name) as rpm_obj:
                     extract_name = ""
-                    for (key, value) in rpm_obj.headers.items():
+                    for key, value in rpm_obj.headers.items():
                         if key == "arch":
                             self.event["architecture"] = value
                         elif key == "archive_compression":

diff --git a/src/python/strelka/scanners/scan_rtf.py b/src/python/strelka/scanners/scan_rtf.py
@@ -27,17 +27,14 @@ def scan(self, data, file, options, expire_at):
             index = rtf.server.index(rtf_object)
 
             if rtf_object.is_package:
-
                 # Send extracted file back to Strelka
                 self.emit_file(rtf_object.olepkgdata, name=rtf_object.filename)
 
             elif rtf_object.is_ole:
-
                 # Send extracted file back to Strelka
                 self.emit_file(rtf_object.oledata, name=f"rtf_object_{index}")
 
             else:
-
                 # Send extracted file back to Strelka
                 self.emit_file(rtf_object.rawdata, name=f"rtf_object_{index}")
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,7 +14,6 @@ def scan(self, data, file, options, expire_at): @@
             self.event["total"] = {"attachments": 0, "extracted": 0}
             try:
                 # Open and parse email byte string
                 # If fail to open, return.
                 try:
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -18,5 +18,4 @@ def init(self):
		pass

		def scan(self, data, file, options, expire_at):

		raise Exception("Scanner Exception")