diff --git a/README.md b/README.md
index a9782bb62b..34c742fa58 100644
--- a/README.md
+++ b/README.md
@@ -383,6 +383,7 @@ For details, see [Configuration](https://www.blacklanternsecurity.com/bbot/Stabl
- [List of Modules](https://www.blacklanternsecurity.com/bbot/Stable/modules/list_of_modules)
- [Nuclei](https://www.blacklanternsecurity.com/bbot/Stable/modules/nuclei)
- [Custom YARA Rules](https://www.blacklanternsecurity.com/bbot/Stable/modules/custom_yara_rules)
+ - [Lightfuzz](https://www.blacklanternsecurity.com/bbot/Stable/modules/lightfuzz)
- **Misc**
- [Contribution](https://www.blacklanternsecurity.com/bbot/Stable/contribution)
- [Release History](https://www.blacklanternsecurity.com/bbot/Stable/release_history)
diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py
index 2c4718844c..aa7715e71c 100644
--- a/bbot/core/event/base.py
+++ b/bbot/core/event/base.py
@@ -9,9 +9,9 @@
import ipaddress
import traceback
-from copy import copy
from pathlib import Path
from typing import Optional
+from copy import copy, deepcopy
from contextlib import suppress
from radixtarget import RadixTarget
from pydantic import BaseModel, field_validator
@@ -40,6 +40,7 @@
validators,
get_file_extension,
)
+from bbot.core.helpers.web.envelopes import BaseEnvelope
log = logging.getLogger("bbot.core.event")
@@ -592,6 +593,10 @@ def parent(self, parent):
elif not self._dummy:
log.warning(f"Tried to set invalid parent on {self}: (got: {repr(parent)} ({type(parent)}))")
+ @property
+ def children(self):
+ return []
+
@property
def parent_id(self):
parent_id = getattr(self.get_parent(), "id", None)
@@ -646,6 +651,13 @@ def get_parents(self, omit=False, include_self=False):
e = parent
return parents
+ def clone(self):
+ # Create a shallow copy of the event first
+ cloned_event = copy(self)
+ # Re-assign a new UUID
+ cloned_event._uuid = uuid.uuid4()
+ return cloned_event
+
def _host(self):
return ""
@@ -827,7 +839,13 @@ def json(self, mode="json", siem_friendly=False):
j["discovery_path"] = self.discovery_path
j["parent_chain"] = self.parent_chain
+ # parameter envelopes
+ parameter_envelopes = getattr(self, "envelopes", None)
+ if parameter_envelopes is not None:
+ j["envelopes"] = parameter_envelopes.to_dict()
+
# normalize non-primitive python objects
+
for k, v in list(j.items()):
if k == "data":
continue
@@ -1327,12 +1345,56 @@ class URL_HINT(URL_UNVERIFIED):
class WEB_PARAMETER(DictHostEvent):
+ @property
+ def children(self):
+ # if we have any subparams, raise a new WEB_PARAMETER for each one
+ children = []
+ envelopes = getattr(self, "envelopes", None)
+ if envelopes is not None:
+ subparams = sorted(list(self.envelopes.get_subparams()))
+
+ if envelopes.selected_subparam is None:
+ current_subparam = subparams[0]
+ envelopes.selected_subparam = current_subparam[0]
+ if len(subparams) > 1:
+ for subparam, _ in subparams[1:]:
+ clone = self.clone()
+ clone.envelopes = deepcopy(envelopes)
+ clone.envelopes.selected_subparam = subparam
+ clone.parent = self
+ children.append(clone)
+ return children
+
+ def sanitize_data(self, data):
+ original_value = data.get("original_value", None)
+ if original_value is not None:
+ try:
+ envelopes = BaseEnvelope.detect(original_value)
+ setattr(self, "envelopes", envelopes)
+ except ValueError as e:
+ log.verbose(f"Error detecting envelopes for {self}: {e}")
+ return data
+
def _data_id(self):
# dedupe by url:name:param_type
url = self.data.get("url", "")
name = self.data.get("name", "")
param_type = self.data.get("type", "")
- return f"{url}:{name}:{param_type}"
+ envelopes = getattr(self, "envelopes", "")
+ subparam = getattr(envelopes, "selected_subparam", "")
+
+ return f"{url}:{name}:{param_type}:{subparam}"
+
+ def _outgoing_dedup_hash(self, event):
+ return hash(
+ (
+ str(event.host),
+ event.data["url"],
+ event.data.get("name", ""),
+ event.data.get("type", ""),
+ event.data.get("envelopes", ""),
+ )
+ )
def _url(self):
return self.data["url"]
@@ -1768,7 +1830,6 @@ def make_event(
data = net.network_address
event_class = globals().get(event_type, DefaultEvent)
-
return event_class(
data,
event_type=event_type,
@@ -1828,7 +1889,6 @@ def event_from_json(j, siem_friendly=False):
resolved_hosts = j.get("resolved_hosts", [])
event._resolved_hosts = set(resolved_hosts)
-
event.timestamp = datetime.datetime.fromisoformat(j["timestamp"])
event.scope_distance = j["scope_distance"]
parent_id = j.get("parent", None)
diff --git a/bbot/core/helpers/diff.py b/bbot/core/helpers/diff.py
index 1ea5de0e02..64c1b1e6a5 100644
--- a/bbot/core/helpers/diff.py
+++ b/bbot/core/helpers/diff.py
@@ -15,22 +15,24 @@ def __init__(
parent_helper,
method="GET",
data=None,
+ json=None,
allow_redirects=False,
include_cache_buster=True,
headers=None,
cookies=None,
- timeout=15,
+ timeout=10,
):
self.parent_helper = parent_helper
self.baseline_url = baseline_url
self.include_cache_buster = include_cache_buster
self.method = method
self.data = data
+ self.json = json
self.allow_redirects = allow_redirects
self._baselined = False
self.headers = headers
self.cookies = cookies
- self.timeout = 15
+ self.timeout = 10
@staticmethod
def merge_dictionaries(headers1, headers2):
@@ -53,12 +55,13 @@ async def _baseline(self):
follow_redirects=self.allow_redirects,
method=self.method,
data=self.data,
+ json=self.json,
headers=self.headers,
cookies=self.cookies,
retries=2,
timeout=self.timeout,
)
- await self.parent_helper.sleep(1)
+ await self.parent_helper.sleep(0.5)
# put random parameters in URL, headers, and cookies
get_params = {self.parent_helper.rand_string(6): self.parent_helper.rand_string(6)}
@@ -76,12 +79,12 @@ async def _baseline(self):
follow_redirects=self.allow_redirects,
method=self.method,
data=self.data,
+ json=self.json,
retries=2,
timeout=self.timeout,
)
self.baseline = baseline_1
-
if baseline_1 is None or baseline_2 is None:
log.debug("HTTP error while establishing baseline, aborting")
raise HttpCompareError(
@@ -90,6 +93,7 @@ async def _baseline(self):
if baseline_1.status_code != baseline_2.status_code:
log.debug("Status code not stable during baseline, aborting")
raise HttpCompareError("Can't get baseline from source URL")
+
try:
baseline_1_json = xmltodict.parse(baseline_1.text)
baseline_2_json = xmltodict.parse(baseline_2.text)
@@ -105,11 +109,9 @@ async def _baseline(self):
for k in ddiff.keys():
for x in list(ddiff[k]):
- log.debug(f"Added {k} filter for path: {x.path()}")
self.ddiff_filters.append(x.path())
self.baseline_json = baseline_1_json
-
self.baseline_ignore_headers = [
h.lower()
for h in [
@@ -167,7 +169,6 @@ def compare_body(self, content_1, content_2):
if len(ddiff.keys()) == 0:
return True
else:
- log.debug(ddiff)
return False
async def compare(
@@ -178,6 +179,7 @@ async def compare(
check_reflection=False,
method="GET",
data=None,
+ json=None,
allow_redirects=False,
timeout=None,
):
@@ -208,6 +210,7 @@ async def compare(
follow_redirects=allow_redirects,
method=method,
data=data,
+ json=json,
timeout=timeout,
)
diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py
index 78ccf67155..e863723801 100644
--- a/bbot/core/helpers/helper.py
+++ b/bbot/core/helpers/helper.py
@@ -12,6 +12,7 @@
from .regex import RegexHelper
from .wordcloud import WordCloud
from .interactsh import Interactsh
+from .yara_helper import YaraHelper
from .depsinstaller import DepsInstaller
from .async_helpers import get_event_loop
@@ -85,6 +86,7 @@ def __init__(self, preset):
self._cloud = None
self.re = RegexHelper(self)
+ self.yara = YaraHelper(self)
self._dns = None
self._web = None
self.config_aware_validators = self.validators.Validators(self)
@@ -129,7 +131,8 @@ def http_compare(
cookies=None,
method="GET",
data=None,
- timeout=15,
+ json=None,
+ timeout=10,
):
return HttpCompare(
url,
@@ -141,6 +144,7 @@ def http_compare(
timeout=timeout,
method=method,
data=data,
+ json=json,
)
def temp_filename(self, extension=None):
diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py
index 92c9e523fd..6eda16e4da 100644
--- a/bbot/core/helpers/misc.py
+++ b/bbot/core/helpers/misc.py
@@ -2,6 +2,7 @@
import sys
import copy
import json
+import math
import random
import string
import asyncio
@@ -9,6 +10,7 @@
import ipaddress
import regex as re
import subprocess as sp
+
from pathlib import Path
from contextlib import suppress
from unidecode import unidecode # noqa F401
@@ -797,17 +799,14 @@ def recursive_decode(data, max_depth=5):
return data
-rand_pool = string.ascii_lowercase
-rand_pool_digits = rand_pool + string.digits
-
-
-def rand_string(length=10, digits=True):
+def rand_string(length=10, digits=True, numeric_only=False):
"""
Generates a random string of specified length.
Args:
length (int, optional): The length of the random string. Defaults to 10.
digits (bool, optional): Whether to include digits in the string. Defaults to True.
+ numeric_only (bool, optional): Whether to generate a numeric-only string. Defaults to False.
Returns:
str: A random string of the specified length.
@@ -819,11 +818,17 @@ def rand_string(length=10, digits=True):
'ap4rsdtg5iw7ey7y3oa5'
>>> rand_string(30, digits=False)
'xdmyxtglqfzqktngkesyulwbfrihva'
+ >>> rand_string(15, numeric_only=True)
+ '934857349857395'
"""
- pool = rand_pool
- if digits:
- pool = rand_pool_digits
- return "".join([random.choice(pool) for _ in range(int(length))])
+ if numeric_only:
+ pool = string.digits
+ elif digits:
+ pool = string.ascii_lowercase + string.digits
+ else:
+ pool = string.ascii_lowercase
+
+ return "".join(random.choice(pool) for _ in range(length))
def truncate_string(s, n):
@@ -885,7 +890,7 @@ def extract_params_xml(xml_data, compare_mode="getparam"):
xml_data (str): XML-formatted string containing elements.
Returns:
- set: A set of tuples containing the tags and their corresponding text values present in the XML object.
+ set: A set of tuples containing the tags and their corresponding sanitized text values present in the XML object.
Raises:
Returns an empty set if ParseError occurs.
@@ -907,7 +912,10 @@ def extract_params_xml(xml_data, compare_mode="getparam"):
while stack:
current_element = stack.pop()
if validate_parameter(current_element.tag, compare_mode):
- tag_value_pairs.add((current_element.tag, current_element.text))
+ # Sanitize the text value
+ text_value = current_element.text.strip() if current_element.text else None
+ sanitized_value = quote(text_value, safe="") if text_value else None
+ tag_value_pairs.add((current_element.tag, sanitized_value))
for child in current_element:
stack.append(child)
return tag_value_pairs
@@ -921,6 +929,7 @@ def extract_params_xml(xml_data, compare_mode="getparam"):
"getparam": {chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="},
"postparam": {chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="},
"cookie": {chr(c) for c in range(33, 127) if chr(c) not in '()<>@,;:"/[]?={} \t'},
+ "bodyjson": set(chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="),
}
@@ -1874,6 +1883,7 @@ def make_table(rows, header, **kwargs):
| row2 | row2 |
+-----------+-----------+
"""
+
from tabulate import tabulate
# fix IndexError: list index out of range
@@ -2772,6 +2782,21 @@ def clean_dict(d, *key_names, fuzzy=False, exclude_keys=None, _prev_key=None):
return d
+def calculate_entropy(data):
+ """Calculate the Shannon entropy of a byte sequence"""
+ if not data:
+ return 0
+ frequency = {}
+ for byte in data:
+ if byte in frequency:
+ frequency[byte] += 1
+ else:
+ frequency[byte] = 1
+ data_len = len(data)
+ entropy = -sum((count / data_len) * math.log2(count / data_len) for count in frequency.values())
+ return entropy
+
+
top_ports_cache = None
@@ -2825,3 +2850,15 @@ def clean_requirement(req_string):
dist = distribution("bbot")
return [clean_requirement(r) for r in dist.requires]
+
+
+def is_printable(s):
+ """
+ Check if a string is printable
+ """
+ if not isinstance(s, str):
+ raise ValueError(f"Expected a string, got {type(s)}")
+
+ # Exclude control characters that break display/printing
+ s = set(s)
+ return all(ord(c) >= 32 or c in "\t\n\r" for c in s)
diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py
index 6a0a27456e..122ff79af2 100644
--- a/bbot/core/helpers/regexes.py
+++ b/bbot/core/helpers/regexes.py
@@ -114,27 +114,64 @@
# For use with excavate parameters extractor
input_tag_regex = re.compile(
- r"]+?name=[\"\']?([\.$\w]+)[\"\']?(?:[^>]*?value=[\"\']([=+\/\w]*)[\"\'])?[^>]*>"
+ r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w\s]*)[\"\']?[^>]*?>"
)
-jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=")
-jquery_post_regex = re.compile(r"\$.post\([\'\"].+[\'\"].+\{(.+)\}")
+input_tag_regex2 = re.compile(
+ r"]*?\svalue=[\"\']?([:\-%\._=+\/\w\s]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
+)
+input_tag_novalue_regex = re.compile(r"]*\b\svalue=)[^>]*?\sname=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>")
+# jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=")
+# jquery_get_regex = re.compile(r"\$.get\([\'\"].+[\'\"].+\{(.+)\}")
+# jquery_post_regex = re.compile(r"\$.post\([\'\"].+[\'\"].+\{(.+)\}")
a_tag_regex = re.compile(r"]*href=[\"\']([^\"\'?>]*)\?([^&\"\'=]+)=([^&\"\'=]+)")
img_tag_regex = re.compile(r"]*src=[\"\']([^\"\'?>]*)\?([^&\"\'=]+)=([^&\"\'=]+)")
get_form_regex = re.compile(
- r"
-
+