diff --git a/doc/userguide/rules/datasets.rst b/doc/userguide/rules/datasets.rst index bf6ab9b1edd3..8299c3ff46b0 100644 --- a/doc/userguide/rules/datasets.rst +++ b/doc/userguide/rules/datasets.rst @@ -3,8 +3,8 @@ Datasets ======== -Using the ``dataset`` and ``datarep`` keyword it is possible to match on -large amounts of data against any sticky buffer. +Using the ``dataset`` and ``datarep`` and ``datajson`` keyword it is possible +to match on large amounts of data against any sticky buffer. For example, to match against a DNS black list called ``dns-bl``:: @@ -145,6 +145,26 @@ reputation lists. A MD5 list, a SHA256 list, and a raw string (buffer) list. The rules will only match if the data is in the list and the reputation value is higher than 200. +datajson +~~~~~~~~ + +DataJSON allows matching data against a set and output data attached to the matching +value in the event. + +Syntax:: + + datajson:,,; + + datajson:, \ + [, type , load , memcap , hashsize , key ]; + +Example rules could look like:: + + alert http any any -> any any (msg:"IP match"; ip.dst; datajson:isset,bad_ips, type ip, load bad_ips.csv, key bad_ones; sid:8000001;) + +In this example, the match will occur if the destination IP is in the set and the +alert will have an ``alert.extra.bad_ones`` subobject that will contain the JSON +data associated to the value. Rule Reloads ------------ @@ -243,6 +263,44 @@ Syntax:: dataset-dump +datajson-add +~~~~~~~~~~~~ + +Unix Socket command to add data to a set. On success, the addition becomes +active instantly. + +Syntax:: + + datajson-add + +set name + Name of an already defined dataset +type + Data type: string, md5, sha256, ipv4, ip +data + Data to add in serialized form (base64 for string, hex notation for md5/sha256, string representation for ipv4/ip) + +Example adding 'google.com' to set 'myset':: + + datajson-add myset string Z29vZ2xlLmNvbQ== {"city":"Mountain View"} + +datajson-remove +~~~~~~~~~~~~~~~ + +Unix Socket command to remove data from a set. On success, the removal becomes +active instantly. + +Syntax:: + + datajson-remove + +set name + Name of an already defined dataset +type + Data type: string, md5, sha256, ipv4, ip +data + Data to remove in serialized form (base64 for string, hex notation for md5/sha256, string representation for ipv4/ip) + File formats ------------ @@ -285,13 +343,30 @@ which when piped to ``base64 -d`` reveals its value:: datarep ~~~~~~~ -The datarep format follows the dataset, expect that there are 1 more CSV +The datarep format follows the dataset, except that there are 1 more CSV field: Syntax:: , + +datajson +~~~~~~~~ + +The datajson format follows the dataset, except that there is a comma +separator followed by a second field that must contain a valid JSON +object: + +Syntax:: + + , + +e.g. for ua-seen with type string:: + + TW96aWxsYS80LjAgKGNvbXBhdGlibGU7ICk=,{"agent": "Mozilla", "version": "4.0"} + + .. _datasets_file_locations: File Locations diff --git a/doc/userguide/rules/payload-keywords.rst b/doc/userguide/rules/payload-keywords.rst index 780ad111f7dc..ae013e7db227 100644 --- a/doc/userguide/rules/payload-keywords.rst +++ b/doc/userguide/rules/payload-keywords.rst @@ -774,6 +774,67 @@ qualities of pcre as well. These are: .. note:: The following characters must be escaped inside the content: ``;`` ``\`` ``"`` +PCRE extraction +~~~~~~~~~~~~~~~ + +It is possible to capture groups from the regular expression and log them into the +alert events. + +There is 3 capabilities: + +* pkt: the extracted group is logged as pkt variable in ``metadata.pktvars`` +* alert: the extracted group is logged to the ``alert.extra`` subobject +* flow: the extracted group is stored in a flow variable and end up in the ``metadata.flowvars`` + +To use the feature, parameters of pcre keyword need to be updated. +After the regular pcre regex and options, a comma separated lists of variable names. +The prefix here is ``flow:``, ``pkt:`` or ``alert:`` and the names can contain special +characters now. The names map to the capturing substring expressions in order :: + + pcre:"/([a-z]+)\/[a-z]+\/(.+)\/(.+)\/changelog$/GUR, \ + flow:ua/ubuntu/repo,flow:ua/ubuntu/pkg/base, \ + flow:ua/ubuntu/pkg/version"; + +This would result in the alert event has something like :: + + "metadata": { + "flowvars": [ + {"ua/ubuntu/repo": "fr"}, + {"ua/ubuntu/pkg/base": "curl"}, + {"ua/ubuntu/pkg/version": "2.2.1"} + ] + } + +The other events on the same flow such as the ``flow`` one will +also have the flow vars. + +If this is not wanted, you can use the ``alert:`` construct to only +get the event in the alert :: + + pcre:"/([a-z]+)\/[a-z]+\/(.+)\/(.+)\/changelog$/GUR, \ + alert:ua/ubuntu/repo,alert:ua/ubuntu/pkg/base, \ + alert:ua/ubuntu/pkg/version"; + +With that syntax, the result of the extraction will appear like :: + + "alert": { + "extra": { + "ua/ubuntu/repo": "fr", + "ua/ubuntu/pkg/base": "curl", + "ua/ubuntu/pkg/version": "2.2.1" + ] + } + +A combination of the extraction scopes can be combined. + +It is also possible to extract key/value pair in the ``pkt`` scope. +One capture would be the key, the second the value. The notation is similar to the last :: + + pcre:"^/([A-Z]+) (.*)\r\n/, pkt:key,pkt:value"; + +``key`` and ``value`` are simply hardcoded names to trigger the key/value extraction. +As a consequence, they can't be used as name for the variables. + Suricata's modifiers ~~~~~~~~~~~~~~~~~~~~ diff --git a/etc/schema.json b/etc/schema.json index c6ac4d7a4132..900c189b3ad7 100644 --- a/etc/schema.json +++ b/etc/schema.json @@ -216,6 +216,11 @@ "xff": { "type": "string" }, + "extra": { + "type": "object", + "additionalProperties": true, + "description": "Extra data created by keywords such as datajson" + }, "metadata": { "type": "object", "properties": { @@ -2802,7 +2807,7 @@ "type": "string" } }, - "additionalProperties": false + "additionalProperties": true } }, "flowints": { diff --git a/python/suricata/sc/specs.py b/python/suricata/sc/specs.py index c7e045873303..58917c4ab9a0 100644 --- a/python/suricata/sc/specs.py +++ b/python/suricata/sc/specs.py @@ -194,6 +194,38 @@ "required": 1, }, ], + "datajson-add": [ + { + "name": "setname", + "required": 1, + }, + { + "name": "settype", + "required": 1, + }, + { + "name": "datavalue", + "required": 1, + }, + { + "name": "datajson", + "required": 1, + }, + ], + "datajson-remove": [ + { + "name": "setname", + "required": 1, + }, + { + "name": "settype", + "required": 1, + }, + { + "name": "datavalue", + "required": 1, + }, + ], "get-flow-stats-by-id": [ { "name": "flow_id", diff --git a/python/suricata/sc/suricatasc.py b/python/suricata/sc/suricatasc.py index fc07037c3d22..350c611b67c8 100644 --- a/python/suricata/sc/suricatasc.py +++ b/python/suricata/sc/suricatasc.py @@ -113,6 +113,8 @@ def __init__(self, sck_path, verbose=False): "memcap-show", "dataset-add", "dataset-remove", + "datajson-add", + "datajson-remove", "get-flow-stats-by-id", "dataset-clear", "dataset-lookup", @@ -218,6 +220,11 @@ def execute(self, command): cmd_specs = argsd[cmd] required_args_count = len([d["required"] for d in cmd_specs if d["required"] and not "val" in d]) arguments = dict() + # if all arguments are required in the command then we split at the count + # this way we can handle last argument containing space (datajson-add for example) + non_req_args_count = len([d for d in cmd_specs if not d["required"] or "val" in d]) + if non_req_args_count == 0: + full_cmd = command.split(maxsplit=required_args_count) for c, spec in enumerate(cmd_specs, 1): spec_type = str if "type" not in spec else spec["type"] if spec["required"]: diff --git a/src/Makefile.am b/src/Makefile.am index c2cf2dd93ab5..d64744d8ea6e 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -50,6 +50,7 @@ noinst_HEADERS = \ datasets.h \ datasets-ipv4.h \ datasets-ipv6.h \ + datasets-json.h \ datasets-md5.h \ datasets-reputation.h \ datasets-sha256.h \ @@ -102,6 +103,7 @@ noinst_HEADERS = \ detect-config.h \ detect-content.h \ detect-csum.h \ + detect-datajson.h \ detect-datarep.h \ detect-dataset.h \ detect-dce-iface.h \ @@ -667,6 +669,7 @@ libsuricata_c_a_SOURCES = \ detect-config.c \ detect-content.c \ detect-csum.c \ + detect-datajson.c \ detect-datarep.c \ detect-dataset.c \ detect-dce-iface.c \ diff --git a/src/datasets-ipv4.c b/src/datasets-ipv4.c index 67f8778fd2d6..1e6fc07d4973 100644 --- a/src/datasets-ipv4.c +++ b/src/datasets-ipv4.c @@ -56,3 +56,37 @@ uint32_t IPv4Hash(uint32_t hash_seed, void *s) void IPv4Free(void *s) { } + +int IPv4JsonSet(void *dst, void *src) +{ + IPv4TypeJson *src_s = src; + IPv4TypeJson *dst_s = dst; + memcpy(dst_s->ipv4, src_s->ipv4, sizeof(dst_s->ipv4)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + +bool IPv4JsonCompare(void *a, void *b) +{ + const IPv4TypeJson *as = a; + const IPv4TypeJson *bs = b; + + return (memcmp(as->ipv4, bs->ipv4, sizeof(as->ipv4)) == 0); +} + +uint32_t IPv4JsonHash(uint32_t hash_seed, void *s) +{ + const IPv4TypeJson *str = s; + return hashword((uint32_t *)str->ipv4, 1, hash_seed); +} + +// data stays in hash +void IPv4JsonFree(void *s) +{ + const IPv4TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-ipv4.h b/src/datasets-ipv4.h index 4a840e9aa631..3fc23ee2958e 100644 --- a/src/datasets-ipv4.h +++ b/src/datasets-ipv4.h @@ -25,15 +25,26 @@ #define SURICATA_DATASETS_IPV4_H #include "datasets-reputation.h" +#include "datasets-json.h" typedef struct IPv4Type { uint8_t ipv4[4]; DataRepType rep; } IPv4Type; +typedef struct IPv4TypeJson { + uint8_t ipv4[4]; + DataJsonType json; +} IPv4TypeJson; + int IPv4Set(void *dst, void *src); bool IPv4Compare(void *a, void *b); uint32_t IPv4Hash(uint32_t hash_seed, void *s); void IPv4Free(void *s); +int IPv4JsonSet(void *dst, void *src); +bool IPv4JsonCompare(void *a, void *b); +uint32_t IPv4JsonHash(uint32_t hash_seed, void *s); +void IPv4JsonFree(void *s); + #endif /* SURICATA_DATASETS_IPV4_H */ diff --git a/src/datasets-ipv6.c b/src/datasets-ipv6.c index ac96374da7c7..61beb56979ba 100644 --- a/src/datasets-ipv6.c +++ b/src/datasets-ipv6.c @@ -56,3 +56,36 @@ uint32_t IPv6Hash(uint32_t hash_seed, void *s) void IPv6Free(void *s) { } + +int IPv6JsonSet(void *dst, void *src) +{ + IPv6TypeJson *src_s = src; + IPv6TypeJson *dst_s = dst; + memcpy(dst_s->ipv6, src_s->ipv6, sizeof(dst_s->ipv6)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + return 0; +} + +bool IPv6JsonCompare(void *a, void *b) +{ + const IPv6TypeJson *as = a; + const IPv6TypeJson *bs = b; + + return (memcmp(as->ipv6, bs->ipv6, sizeof(as->ipv6)) == 0); +} + +uint32_t IPv6JsonHash(uint32_t hash_seed, void *s) +{ + const IPv6TypeJson *str = s; + return hashword((uint32_t *)str->ipv6, 4, hash_seed); +} + +void IPv6JsonFree(void *s) +{ + const IPv6TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-ipv6.h b/src/datasets-ipv6.h index c75ad194d685..eeedd4121a80 100644 --- a/src/datasets-ipv6.h +++ b/src/datasets-ipv6.h @@ -31,9 +31,19 @@ typedef struct IPv6Type { DataRepType rep; } IPv6Type; +typedef struct IPv6TypeJson { + uint8_t ipv6[16]; + DataJsonType json; +} IPv6TypeJson; + int IPv6Set(void *dst, void *src); bool IPv6Compare(void *a, void *b); uint32_t IPv6Hash(uint32_t hash_seed, void *s); void IPv6Free(void *s); +int IPv6JsonSet(void *dst, void *src); +bool IPv6JsonCompare(void *a, void *b); +uint32_t IPv6JsonHash(uint32_t hash_seed, void *s); +void IPv6JsonFree(void *s); + #endif /* __DATASETS_IPV4_H__ */ diff --git a/src/datasets-json.h b/src/datasets-json.h new file mode 100644 index 000000000000..1a542edd4666 --- /dev/null +++ b/src/datasets-json.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#ifndef __DATASETS_JSON_H__ +#define __DATASETS_JSON_H__ + +#include + +typedef struct DataJsonType { + char *value; + size_t len; +} DataJsonType; + +typedef struct DataJsonResultType { + bool found; + DataJsonType json; +} DataJsonResultType; + +#endif /* __DATASETS_JSON_H__ */ diff --git a/src/datasets-md5.c b/src/datasets-md5.c index 28fd37d8304a..77a94f2ae1c3 100644 --- a/src/datasets-md5.c +++ b/src/datasets-md5.c @@ -57,3 +57,36 @@ uint32_t Md5StrHash(uint32_t hash_seed, void *s) void Md5StrFree(void *s) { } + +int Md5StrJsonSet(void *dst, void *src) +{ + Md5TypeJson *src_s = src; + Md5TypeJson *dst_s = dst; + memcpy(dst_s->md5, src_s->md5, sizeof(dst_s->md5)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + +bool Md5StrJsonCompare(void *a, void *b) +{ + const Md5TypeJson *as = a; + const Md5TypeJson *bs = b; + + return (memcmp(as->md5, bs->md5, sizeof(as->md5)) == 0); +} + +uint32_t Md5StrJsonHash(uint32_t hash_seed, void *s) +{ + const Md5TypeJson *str = s; + return hashword((uint32_t *)str->md5, sizeof(str->md5) / 4, hash_seed); +} + +// data stays in hash +void Md5StrJsonFree(void *s) +{ + const Md5TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-md5.h b/src/datasets-md5.h index 88c1ff1dfd3b..858ecf1b6ea6 100644 --- a/src/datasets-md5.h +++ b/src/datasets-md5.h @@ -25,15 +25,26 @@ #define SURICATA_DATASETS_MD5_H #include "datasets-reputation.h" +#include "datasets-json.h" typedef struct Md5Type { uint8_t md5[16]; DataRepType rep; } Md5Type; +typedef struct Md5TypeJson { + uint8_t md5[16]; + DataJsonType json; +} Md5TypeJson; + int Md5StrSet(void *dst, void *src); bool Md5StrCompare(void *a, void *b); uint32_t Md5StrHash(uint32_t hash_seed, void *s); void Md5StrFree(void *s); +int Md5StrJsonSet(void *dst, void *src); +bool Md5StrJsonCompare(void *a, void *b); +uint32_t Md5StrJsonHash(uint32_t hash_seed, void *s); +void Md5StrJsonFree(void *s); + #endif /* SURICATA_DATASETS_MD5_H */ diff --git a/src/datasets-sha256.c b/src/datasets-sha256.c index 240939c08454..aa9f4df137c2 100644 --- a/src/datasets-sha256.c +++ b/src/datasets-sha256.c @@ -56,3 +56,36 @@ void Sha256StrFree(void *s) { // no dynamic data } + +int Sha256StrJsonSet(void *dst, void *src) +{ + Sha256TypeJson *src_s = src; + Sha256TypeJson *dst_s = dst; + memcpy(dst_s->sha256, src_s->sha256, sizeof(dst_s->sha256)); + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + return 0; +} + +bool Sha256StrJsonCompare(void *a, void *b) +{ + Sha256TypeJson *as = a; + Sha256TypeJson *bs = b; + + return (memcmp(as->sha256, bs->sha256, sizeof(as->sha256)) == 0); +} + +uint32_t Sha256StrJsonHash(uint32_t hash_seed, void *s) +{ + Sha256TypeJson *str = s; + return hashword((uint32_t *)str->sha256, sizeof(str->sha256) / 4, hash_seed); +} + +// data stays in hash +void Sha256StrJsonFree(void *s) +{ + const Sha256TypeJson *as = s; + if (as->json.value) { + SCFree(as->json.value); + } +} diff --git a/src/datasets-sha256.h b/src/datasets-sha256.h index 4f99b85a96e6..912a6b2e5b1f 100644 --- a/src/datasets-sha256.h +++ b/src/datasets-sha256.h @@ -25,15 +25,26 @@ #define SURICATA_DATASETS_SHA256_H #include "datasets-reputation.h" +#include "datasets-json.h" typedef struct Sha256Type { uint8_t sha256[32]; DataRepType rep; } Sha256Type; +typedef struct Sha256TypeJson { + uint8_t sha256[32]; + DataJsonType json; +} Sha256TypeJson; + int Sha256StrSet(void *dst, void *src); bool Sha256StrCompare(void *a, void *b); uint32_t Sha256StrHash(uint32_t hash_seed, void *s); void Sha256StrFree(void *s); +int Sha256StrJsonSet(void *dst, void *src); +bool Sha256StrJsonCompare(void *a, void *b); +uint32_t Sha256StrJsonHash(uint32_t hash_seed, void *s); +void Sha256StrJsonFree(void *s); + #endif /* SURICATA_DATASETS_SHA256_H */ diff --git a/src/datasets-string.c b/src/datasets-string.c index 85fe864f52db..e00eefe5fa0b 100644 --- a/src/datasets-string.c +++ b/src/datasets-string.c @@ -103,3 +103,62 @@ void StringFree(void *s) StringType *str = s; SCFree(str->ptr); } + +int StringJsonAsBase64(const void *s, char *out, size_t out_size) +{ + const StringTypeJson *str = s; + + unsigned long len = Base64EncodeBufferSize(str->len); + uint8_t encoded_data[len]; + if (Base64Encode((unsigned char *)str->ptr, str->len, encoded_data, &len) != SC_BASE64_OK) + return 0; + + strlcpy(out, (const char *)encoded_data, out_size); + strlcat(out, "\n", out_size); + return strlen(out); +} + +int StringJsonSet(void *dst, void *src) +{ + StringTypeJson *src_s = src; + StringTypeJson *dst_s = dst; + SCLogDebug("dst %p src %p, src_s->ptr %p src_s->len %u", dst, src, src_s->ptr, src_s->len); + + dst_s->len = src_s->len; + dst_s->ptr = SCMalloc(dst_s->len); + BUG_ON(dst_s->ptr == NULL); + memcpy(dst_s->ptr, src_s->ptr, dst_s->len); + + dst_s->json.value = src_s->json.value; + dst_s->json.len = src_s->json.len; + + SCLogDebug("dst %p src %p, dst_s->ptr %p dst_s->len %u", dst, src, dst_s->ptr, dst_s->len); + return 0; +} + +bool StringJsonCompare(void *a, void *b) +{ + const StringTypeJson *as = a; + const StringTypeJson *bs = b; + + if (as->len != bs->len) + return false; + + return (memcmp(as->ptr, bs->ptr, as->len) == 0); +} + +uint32_t StringJsonHash(uint32_t hash_seed, void *s) +{ + StringTypeJson *str = s; + return hashlittle_safe(str->ptr, str->len, hash_seed); +} + +// base data stays in hash +void StringJsonFree(void *s) +{ + StringTypeJson *str = s; + SCFree(str->ptr); + if (str->json.value) { + SCFree(str->json.value); + } +} diff --git a/src/datasets-string.h b/src/datasets-string.h index 745754fc49ac..302937f01eb4 100644 --- a/src/datasets-string.h +++ b/src/datasets-string.h @@ -32,6 +32,12 @@ typedef struct StringType { uint8_t *ptr; } StringType; +typedef struct StringTypeJson { + uint32_t len; + DataJsonType json; + uint8_t *ptr; +} StringTypeJson; + int StringSet(void *dst, void *src); bool StringCompare(void *a, void *b); uint32_t StringHash(uint32_t hash_seed, void *s); @@ -39,4 +45,10 @@ uint32_t StringGetLength(void *s); void StringFree(void *s); int StringAsBase64(const void *s, char *out, size_t out_size); +int StringJsonSet(void *dst, void *src); +bool StringJsonCompare(void *a, void *b); +uint32_t StringJsonHash(uint32_t hash_seed, void *s); +void StringJsonFree(void *s); +int StringJsonAsBase64(const void *s, char *out, size_t out_size); + #endif /* SURICATA_DATASETS_STRING_H */ diff --git a/src/datasets.c b/src/datasets.c index 402c7d34fe99..79e18e3608b5 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -32,7 +32,9 @@ #include "datasets-md5.h" #include "datasets-sha256.h" #include "datasets-reputation.h" +#include "datasets-json.h" #include "util-conf.h" +#include "util-mem.h" #include "util-thash.h" #include "util-print.h" #include "util-byte.h" @@ -47,6 +49,9 @@ static uint32_t set_ids = 0; static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep); +static int DatasetAddwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json); + static inline void DatasetUnlockData(THashData *d) { (void) THashDecrUsecnt(d); @@ -165,6 +170,45 @@ static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out) return 0; } +/* return true if number is a float or an integer */ +static bool IsFloat(const char *in, size_t ins) +{ + char *endptr; + float val = strtof(in, &endptr); + const char *end_ins = in + ins - 1; + if (val != 0 && (endptr == end_ins)) { + return true; + } + if (val == 0 && (endptr == in)) { + return false; + } else { + return true; + } + return false; +} + +static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out) +{ + json_error_t jerror; + json_t *msg = json_loads(in, 0, &jerror); + if (msg == NULL) { + /* JANSSON does not see an integer, float or a string as valid JSON. + So we need to exclude them from failure. */ + if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) { + SCLogWarning("dataset: Invalid json: %s: '%s'\n", jerror.text, in); + return -1; + } + } else { + json_decref(msg); + } + rep_out->len = ins; + rep_out->value = SCStrndup(in, ins); + if (rep_out->value == NULL) { + return -1; + } + return 0; +} + static int DatasetLoadIPv4(Dataset *set) { if (strlen(set->load) == 0) @@ -205,10 +249,10 @@ static int DatasetLoadIPv4(Dataset *set) /* list with rep data */ } else { line[strlen(line) - 1] = '\0'; - SCLogDebug("IPv4 with REP line: '%s'", line); - *r = '\0'; + SCLogDebug("IPv4 with REP line: '%s'", line); + struct in_addr in; if (inet_pton(AF_INET, line, &in) != 1) { FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line); @@ -239,6 +283,69 @@ static int DatasetLoadIPv4(Dataset *set) return 0; } +static int DatasetJsonLoadIPv4(Dataset *set) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + const char *fopen_mode = "r"; + if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { + fopen_mode = "a+"; + } + + FILE *fp = fopen(set->load, fopen_mode); + if (fp == NULL) { + SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + return -1; + } + + int add_ret; + uint32_t cnt = 0; + char line[1024]; + while (fgets(line, (int)sizeof(line), fp) != NULL) { + char *r = strchr(line, ','); + if (r == NULL) { + FatalErrorOnInit("datajson without separator %s/%s", set->name, set->load); + continue; + /* list with JSON data */ + } else { + line[strlen(line) - 1] = '\0'; + *r = '\0'; + + struct in_addr in; + if (inet_pton(AF_INET, line, &in) != 1) { + FatalErrorOnInit( + "datajson IPv4 parse failed %s/%s: %s", set->name, set->load, line); + continue; + } + + r++; + + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(r, strlen(r), &json) < 0) { + FatalErrorOnInit("bad json value for dataset %s/%s", set->name, set->load); + continue; + } + + add_ret = DatasetAddwJson(set, (const uint8_t *)&in.s_addr, 4, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } + + cnt++; + } + } + THashConsolidateMemcap(set->hash); + + fclose(fp); + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + static int ParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6) { /* Checking IPv6 case */ @@ -335,7 +442,68 @@ static int DatasetLoadIPv6(Dataset *set) FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); continue; } + cnt++; + } + } + THashConsolidateMemcap(set->hash); + + fclose(fp); + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +static int DatasetJsonLoadIPv6(Dataset *set) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + const char *fopen_mode = "r"; + if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { + fopen_mode = "a+"; + } + + FILE *fp = fopen(set->load, fopen_mode); + if (fp == NULL) { + SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + return -1; + } + + int add_ret; + uint32_t cnt = 0; + char line[1024]; + while (fgets(line, (int)sizeof(line), fp) != NULL) { + char *r = strchr(line, ','); + if (r == NULL) { + FatalErrorOnInit("datajson without separator %s/%s", set->name, set->load); + /* list with JSON data */ + } else { + line[strlen(line) - 1] = '\0'; + SCLogDebug("IPv6 with JSON line: '%s'", line); + + *r = '\0'; + struct in6_addr in6; + int ret = ParseIpv6String(set, line, &in6); + if (ret < 0) { + FatalErrorOnInit("unable to parse IP address"); + continue; + } + + r++; + + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(r, strlen(r), &json) < 0) { + FatalErrorOnInit("bad json value for dataset %s/%s", set->name, set->load); + continue; + } + add_ret = DatasetAddwJson(set, (const uint8_t *)&in6.s6_addr, 16, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } cnt++; } } @@ -394,7 +562,7 @@ static int DatasetLoadMd5(Dataset *set) continue; } - DataRepType rep = { .value = 0}; + DataRepType rep = { .value = 0 }; if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) { FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); continue; @@ -420,6 +588,71 @@ static int DatasetLoadMd5(Dataset *set) return 0; } +static int DatasetJsonLoadMd5(Dataset *set) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + const char *fopen_mode = "r"; + if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { + fopen_mode = "a+"; + } + + FILE *fp = fopen(set->load, fopen_mode); + if (fp == NULL) { + SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + return -1; + } + + int add_ret; + uint32_t cnt = 0; + char line[1024]; + while (fgets(line, (int)sizeof(line), fp) != NULL) { + /* straight black/white list */ + if (strlen(line) == 33) { + FatalErrorOnInit("datajson without separator %s/%s", set->name, set->load); + continue; + /* list with json data */ + } else if (strlen(line) > 33 && line[32] == ',') { + line[strlen(line) - 1] = '\0'; + SCLogDebug("MD5 with JSON line: '%s'", line); + + uint8_t hash[16]; + if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) { + FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); + continue; + } + + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(line + 33, strlen(line) - 33, &json) < 0) { + FatalErrorOnInit("bad json for dataset %s/%s", set->name, set->load); + continue; + } + + SCLogDebug("json v:%s", json.value); + add_ret = DatasetAddwJson(set, hash, 16, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } + + cnt++; + + } else { + FatalErrorOnInit("MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line); + continue; + } + } + THashConsolidateMemcap(set->hash); + + fclose(fp); + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + static int DatasetLoadSha256(Dataset *set) { if (strlen(set->load) == 0) @@ -490,6 +723,66 @@ static int DatasetLoadSha256(Dataset *set) return 0; } +static int DatasetJsonLoadSha256(Dataset *set) +{ + if (strlen(set->load) == 0) + return 0; + + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + const char *fopen_mode = "r"; + if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { + fopen_mode = "a+"; + } + + FILE *fp = fopen(set->load, fopen_mode); + if (fp == NULL) { + SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + return -1; + } + + int add_ret; + uint32_t cnt = 0; + char line[1024]; + while (fgets(line, (int)sizeof(line), fp) != NULL) { + /* straight black/white list */ + if (strlen(line) == 65) { + FatalErrorOnInit("datajson missing separator %s/%s", set->name, set->load); + continue; + } else if (strlen(line) > 65 && line[64] == ',') { + line[strlen(line) - 1] = '\0'; + SCLogDebug("SHA-256 with JSON line: '%s'", line); + + uint8_t hash[32]; + if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) { + FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); + continue; + } + + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(line + 65, strlen(line) - 65, &json) < 0) { + FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); + continue; + } + + SCLogDebug("json %s", json.value); + + add_ret = DatasetAddwJson(set, hash, 32, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } + cnt++; + } + } + THashConsolidateMemcap(set->hash); + + fclose(fp); + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + static int DatasetLoadString(Dataset *set) { if (strlen(set->load) == 0) @@ -550,6 +843,7 @@ static int DatasetLoadString(Dataset *set) r++; SCLogDebug("r '%s'", r); + // coverity[alloc_strlen : FALSE] DataRepType rep = { .value = 0 }; if (ParseRepLine(r, strlen(r), &rep) < 0) { FatalErrorOnInit("die: bad rep"); @@ -573,32 +867,105 @@ static int DatasetLoadString(Dataset *set) return 0; } -extern bool g_system; - -enum DatasetGetPathType { - TYPE_STATE, - TYPE_LOAD, -}; - -static void DatasetGetPath(const char *in_path, - char *out_path, size_t out_size, enum DatasetGetPathType type) +static int DatasetJsonLoadString(Dataset *set) { - char path[PATH_MAX]; - struct stat st; + if (strlen(set->load) == 0) + return 0; - if (PathIsAbsolute(in_path)) { - strlcpy(path, in_path, sizeof(path)); - strlcpy(out_path, path, out_size); - return; + SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + const char *fopen_mode = "r"; + if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { + fopen_mode = "a+"; } - const char *data_dir = ConfigGetDataDirectory(); - if (stat(data_dir, &st) != 0) { - SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno)); - return; + FILE *fp = fopen(set->load, fopen_mode); + if (fp == NULL) { + SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + return -1; } - snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS + int add_ret; + uint32_t cnt = 0; + char line[1024]; + while (fgets(line, (int)sizeof(line), fp) != NULL) { + if (strlen(line) <= 1) + continue; + + char *r = strchr(line, ','); + if (r == NULL) { + FatalErrorOnInit("missing separator in datajson set %s/%s", set->name, set->load); + continue; + } else { + line[strlen(line) - 1] = '\0'; + SCLogDebug("line: '%s'", line); + + *r = '\0'; + + uint32_t decoded_size = Base64DecodeBufferSize(strlen(line)); + uint8_t decoded[decoded_size]; + uint32_t num_decoded = + Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded); + if (num_decoded == 0) { + FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load); + continue; + } + + r++; + SCLogDebug("r '%s'", r); + + // coverity[alloc_strlen : FALSE] + DataJsonType json = { .value = NULL, .len = 0 }; + if (ParseJsonLine(r, strlen(r), &json) < 0) { + FatalErrorOnInit("die: bad json"); + continue; + } + SCLogDebug("json %s", json.value); + + add_ret = DatasetAddwJson(set, (const uint8_t *)decoded, num_decoded, &json); + if (add_ret < 0) { + FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load); + continue; + } else if (add_ret == 0) { + SCFree(json.value); + } + cnt++; + + SCLogDebug("line with json %s, %s", line, r); + } + } + THashConsolidateMemcap(set->hash); + + fclose(fp); + SCLogConfig("dataset: %s loaded %u records", set->name, cnt); + return 0; +} + +extern bool g_system; + +enum DatasetGetPathType { + TYPE_STATE, + TYPE_LOAD, +}; + +static void DatasetGetPath( + const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type) +{ + char path[PATH_MAX]; + struct stat st; + + if (PathIsAbsolute(in_path)) { + strlcpy(path, in_path, sizeof(path)); + strlcpy(out_path, path, out_size); + return; + } + + const char *data_dir = ConfigGetDataDirectory(); + if (stat(data_dir, &st) != 0) { + SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno)); + return; + } + + snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS if (type == TYPE_LOAD) { if (stat(path, &st) != 0) { @@ -727,10 +1094,10 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, goto out_err; break; case DATASET_TYPE_IPV4: - set->hash = - THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash, IPv4Compare, - NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap, - hashsize > 0 ? hashsize : default_hashsize); + set->hash = THashInit(cnf_name, sizeof(IPv4TypeJson), IPv4Set, IPv4Free, IPv4Hash, + IPv4Compare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); if (set->hash == NULL) goto out_err; if (DatasetLoadIPv4(set) < 0) @@ -772,6 +1139,135 @@ Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, return NULL; } +Dataset *DatasetJsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize) +{ + uint64_t default_memcap = 0; + uint32_t default_hashsize = 0; + if (strlen(name) > DATASET_NAME_MAX_LEN) { + return NULL; + } + + SCMutexLock(&sets_lock); + Dataset *set = DatasetSearchByName(name); + if (set) { + if (type != DATASET_TYPE_NOTSET && set->type != type) { + SCLogError("dataset %s already " + "exists and is of type %u", + set->name, set->type); + SCMutexUnlock(&sets_lock); + return NULL; + } + + if (load == NULL || strlen(load) == 0) { + // OK, rule keyword doesn't have to set state/load, + // even when yaml set has set it. + } else { + if ((load == NULL && strlen(set->load) > 0) || + (load != NULL && strcmp(set->load, load) != 0)) { + SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load); + SCMutexUnlock(&sets_lock); + return NULL; + } + } + + SCMutexUnlock(&sets_lock); + return set; + } else { + if (type == DATASET_TYPE_NOTSET) { + SCLogError("dataset %s not defined", name); + goto out_err; + } + } + + set = DatasetAlloc(name); + if (set == NULL) { + goto out_err; + } + + strlcpy(set->name, name, sizeof(set->name)); + set->type = type; + if (load && strlen(load)) { + strlcpy(set->load, load, sizeof(set->load)); + SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load); + } + + char cnf_name[128]; + snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name); + + GetDefaultMemcap(&default_memcap, &default_hashsize); + switch (type) { + case DATASET_TYPE_MD5: + set->hash = THashInit(cnf_name, sizeof(Md5TypeJson), Md5StrJsonSet, Md5StrJsonFree, + Md5StrJsonHash, Md5StrJsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetJsonLoadMd5(set) < 0) + goto out_err; + break; + case DATASET_TYPE_STRING: + set->hash = THashInit(cnf_name, sizeof(StringTypeJson), StringJsonSet, StringJsonFree, + StringJsonHash, StringJsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetJsonLoadString(set) < 0) + goto out_err; + break; + case DATASET_TYPE_SHA256: + set->hash = THashInit(cnf_name, sizeof(Sha256TypeJson), Sha256StrJsonSet, + Sha256StrJsonFree, Sha256StrJsonHash, Sha256StrJsonCompare, NULL, NULL, + load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetJsonLoadSha256(set) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV4: + set->hash = THashInit(cnf_name, sizeof(IPv4TypeJson), IPv4JsonSet, IPv4JsonFree, + IPv4JsonHash, IPv4JsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetJsonLoadIPv4(set) < 0) + goto out_err; + break; + case DATASET_TYPE_IPV6: + set->hash = THashInit(cnf_name, sizeof(IPv6TypeJson), IPv6JsonSet, IPv6JsonFree, + IPv6JsonHash, IPv6JsonCompare, NULL, NULL, load != NULL ? 1 : 0, + memcap > 0 ? memcap : default_memcap, + hashsize > 0 ? hashsize : default_hashsize); + if (set->hash == NULL) + goto out_err; + if (DatasetJsonLoadIPv6(set) < 0) + goto out_err; + break; + } + + SCLogDebug( + "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load); + + set->next = sets; + sets = set; + + SCMutexUnlock(&sets_lock); + return set; +out_err: + if (set) { + if (set->hash) { + THashShutdown(set->hash); + } + SCFree(set); + } + SCMutexUnlock(&sets_lock); + return NULL; +} + static bool DatasetIsStatic(const char *save, const char *load) { /* A set is static if it does not have any dynamic properties like @@ -1140,6 +1636,28 @@ static DataRepResultType DatasetLookupStringwRep(Dataset *set, return rrep; } +static DataJsonResultType DatasetLookupStringwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + StringTypeJson lookup = { + .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0 + }; + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + StringTypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1182,6 +1700,30 @@ static DataRepResultType DatasetLookupIPv4wRep( return rrep; } +static DataJsonResultType DatasetLookupIPv4wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != 4) + return rrep; + + IPv4TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv4, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv4TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1224,6 +1766,31 @@ static DataRepResultType DatasetLookupIPv6wRep( return rrep; } +static DataJsonResultType DatasetLookupIPv6wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + /* We can have IPv4 or IPV6 here dur to ip.src and ip.dst implementation */ + if (data_len != 16 && data_len != 4) + return rrep; + + IPv6TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv6, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + IPv6TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1266,6 +1833,30 @@ static DataRepResultType DatasetLookupMd5wRep(Dataset *set, return rrep; } +static DataJsonResultType DatasetLookupMd5wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != 16) + return rrep; + + Md5TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.md5, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Md5TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1308,6 +1899,30 @@ static DataRepResultType DatasetLookupSha256wRep(Dataset *set, return rrep; } +static DataJsonResultType DatasetLookupSha256wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } }; + + if (set == NULL) + return rrep; + + if (data_len != 32) + return rrep; + + Sha256TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.sha256, data, data_len); + THashData *rdata = THashLookupFromHash(set->hash, &lookup); + if (rdata) { + Sha256TypeJson *found = rdata->data; + rrep.found = true; + rrep.json = found->json; + DatasetUnlockData(rdata); + return rrep; + } + return rrep; +} + /** * \brief see if \a data is part of the set * \param set dataset @@ -1359,6 +1974,29 @@ DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uin return rrep; } +DataJsonResultType DatasetLookupwJson(Dataset *set, const uint8_t *data, const uint32_t data_len) +{ + DataJsonResultType rrep = { .found = false, .json = { .value = 0 } }; + if (set == NULL) + return rrep; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatasetLookupStringwJson(set, data, data_len); + case DATASET_TYPE_MD5: + return DatasetLookupMd5wJson(set, data, data_len); + case DATASET_TYPE_SHA256: + return DatasetLookupSha256wJson(set, data, data_len); + case DATASET_TYPE_IPV4: + return DatasetLookupIPv4wJson(set, data, data_len); + case DATASET_TYPE_IPV6: + return DatasetLookupIPv6wJson(set, data, data_len); + default: + break; + } + return rrep; +} + /** * \retval 1 data was added to the hash * \retval 0 data was not added to the hash as it is already there @@ -1478,6 +2116,64 @@ static int DatasetAddIPv6wRep( return -1; } +static int DatasetAddIPv4wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len < 4) + return -2; + + IPv4TypeJson lookup = { .json = *json }; + memcpy(lookup.ipv4, data, 4); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +static int DatasetAddIPv6wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + IPv6TypeJson lookup = { .json = *json }; + memcpy(lookup.ipv6, data, 16); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + +/** + * \retval 1 data was added to the hash + * \retval 0 data was not added to the hash as it is already there + * \retval -1 failed to add data to the hash + */ +static int DatasetAddStringwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + StringTypeJson lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json }; + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1515,6 +2211,25 @@ static int DatasetAddMd5wRep( return -1; } +static int DatasetAddMd5wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + Md5TypeJson lookup = { .json = *json }; + memcpy(lookup.md5, data, 16); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + static int DatasetAddSha256wRep( Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep) { @@ -1534,6 +2249,25 @@ static int DatasetAddSha256wRep( return -1; } +static int DatasetAddSha256wJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 32) + return -2; + + Sha256TypeJson lookup = { .json = *json }; + memcpy(lookup.sha256, data, 32); + struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup); + if (res.data) { + DatasetUnlockData(res.data); + return res.is_new ? 1 : 0; + } + return -1; +} + static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len) { if (set == NULL) @@ -1593,6 +2327,29 @@ static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data return -1; } +static int DatasetAddwJson( + Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json) +{ + if (set == NULL) + return -1; + + switch (set->type) { + case DATASET_TYPE_STRING: + return DatasetAddStringwJson(set, data, data_len, json); + case DATASET_TYPE_MD5: + return DatasetAddMd5wJson(set, data, data_len, json); + case DATASET_TYPE_SHA256: + return DatasetAddSha256wJson(set, data, data_len, json); + case DATASET_TYPE_IPV4: + return DatasetAddIPv4wJson(set, data, data_len, json); + case DATASET_TYPE_IPV6: + return DatasetAddIPv6wJson(set, data, data_len, json); + default: + break; + } + return -1; +} + typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len); static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString, @@ -1771,3 +2528,188 @@ int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len) } return -1; } + +typedef int (*DatajsonOpFunc)( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json); + +static int DatajsonOpSerialized(Dataset *set, const char *string, const char *json, + DatajsonOpFunc DatasetOpString, DatajsonOpFunc DatasetOpMd5, DatajsonOpFunc DatasetOpSha256, + DatajsonOpFunc DatasetOpIPv4, DatajsonOpFunc DatasetOpIPv6) +{ + int ret; + + if (set == NULL) + return -1; + if (strlen(string) == 0) + return -1; + + DataJsonType jvalue = { .value = NULL, .len = 0 }; + if (json) { + if (ParseJsonLine(json, strlen(json), &jvalue) < 0) { + SCLogNotice("bad json value for dataset %s/%s", set->name, set->load); + return -1; + } + } + + switch (set->type) { + case DATASET_TYPE_STRING: { + uint32_t decoded_size = Base64DecodeBufferSize(strlen(string)); + uint8_t decoded[decoded_size]; + uint32_t num_decoded = Base64Decode( + (const uint8_t *)string, strlen(string), Base64ModeStrict, decoded); + if (num_decoded == 0) + goto operror; + ret = DatasetOpString(set, decoded, num_decoded, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_MD5: { + if (strlen(string) != 32) + goto operror; + uint8_t hash[16]; + if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0) + goto operror; + ret = DatasetOpMd5(set, hash, 16, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_SHA256: { + if (strlen(string) != 64) + goto operror; + uint8_t hash[32]; + if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0) + goto operror; + ret = DatasetOpSha256(set, hash, 32, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV4: { + struct in_addr in; + if (inet_pton(AF_INET, string, &in) != 1) + goto operror; + ret = DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + case DATASET_TYPE_IPV6: { + struct in6_addr in6; + if (ParseIpv6String(set, string, &in6) != 0) { + SCLogError("Dataset failed to import %s as IPv6", string); + goto operror; + } + ret = DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16, &jvalue); + if (ret <= 0) { + SCFree(jvalue.value); + } + return ret; + } + } + return -1; +operror: + SCFree(jvalue.value); + return -2; +} + +/** \brief add serialized data to json set + * \retval int 1 added + * \retval int 0 already in hash + * \retval int -1 API error (not added) + * \retval int -2 DATA error + */ +int DatajsonAddSerialized(Dataset *set, const char *value, const char *json) +{ + return DatajsonOpSerialized(set, value, json, DatasetAddStringwJson, DatasetAddMd5wJson, + DatasetAddSha256wJson, DatasetAddIPv4wJson, DatasetAddIPv6wJson); +} + +/** + * \retval 1 data was removed from the hash + * \retval 0 data not removed (busy) + * \retval -1 data not found + */ +static int DatajsonRemoveString( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + StringTypeJson lookup = { + .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0 + }; + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveIPv4( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 4) + return -2; + + IPv4TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv4, data, 4); + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveIPv6( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + IPv6TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.ipv6, data, 16); + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveMd5( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 16) + return -2; + + Md5TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.md5, data, 16); + return THashRemoveFromHash(set->hash, &lookup); +} + +static int DatajsonRemoveSha256( + Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json) +{ + if (set == NULL) + return -1; + + if (data_len != 32) + return -2; + + Sha256TypeJson lookup = { .json.value = NULL, .json.len = 0 }; + memcpy(lookup.sha256, data, 32); + return THashRemoveFromHash(set->hash, &lookup); +} + +/** \brief remove serialized data from set + * \retval int 1 removed + * \retval int 0 found but busy (not removed) + * \retval int -1 API error (not removed) + * \retval int -2 DATA error */ +int DatajsonRemoveSerialized(Dataset *set, const char *string) +{ + return DatajsonOpSerialized(set, string, NULL, DatajsonRemoveString, DatajsonRemoveMd5, + DatajsonRemoveSha256, DatajsonRemoveIPv4, DatajsonRemoveIPv6); +} diff --git a/src/datasets.h b/src/datasets.h index 86bfed02b22f..660eb4e5ecad 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -20,6 +20,7 @@ #include "util-thash.h" #include "datasets-reputation.h" +#include "datasets-json.h" int DatasetsInit(void); void DatasetsDestroy(void); @@ -60,9 +61,15 @@ int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len); int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len); DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep); +DataJsonResultType DatasetLookupwJson(Dataset *set, const uint8_t *data, const uint32_t data_len); +Dataset *DatasetJsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, + uint32_t hashsize); int DatasetAddSerialized(Dataset *set, const char *string); int DatasetRemoveSerialized(Dataset *set, const char *string); int DatasetLookupSerialized(Dataset *set, const char *string); +int DatajsonAddSerialized(Dataset *set, const char *string, const char *json); +int DatajsonRemoveSerialized(Dataset *set, const char *string); + #endif /* SURICATA_DATASETS_H */ diff --git a/src/decode.c b/src/decode.c index 18b7ffa852b8..8ffb7508367e 100644 --- a/src/decode.c +++ b/src/decode.c @@ -145,9 +145,38 @@ PacketAlert *PacketAlertCreate(void) return pa_array; } +void PacketAlertRecycle(PacketAlert *pa_array) +{ + if (pa_array != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + if (pa_array[i].json_info.next != NULL) { + struct JsonInfoList *current_json = pa_array[i].json_info.next; + while (current_json) { + struct JsonInfoList *next_json = current_json->next; + SCFree(current_json); + current_json = next_json; + } + } + pa_array[i].json_info.json_string = NULL; + pa_array[i].json_info.next = NULL; + } + } +} + void PacketAlertFree(PacketAlert *pa) { if (pa != NULL) { + for (int i = 0; i < packet_alert_max; i++) { + /* first item is not allocated so start at second one */ + if (pa[i].json_info.next != NULL) { + struct JsonInfoList *current_json = pa[i].json_info.next; + while (current_json) { + struct JsonInfoList *next_json = current_json->next; + SCFree(current_json); + current_json = next_json; + } + } + } SCFree(pa); } } diff --git a/src/decode.h b/src/decode.h index 1b299864a7c0..dea0d4f1c4cd 100644 --- a/src/decode.h +++ b/src/decode.h @@ -234,6 +234,11 @@ typedef uint16_t Port; #define PKT_IS_TOSERVER(p) (((p)->flowflags & FLOW_PKT_TOSERVER)) #define PKT_IS_TOCLIENT(p) (((p)->flowflags & FLOW_PKT_TOCLIENT)) +struct JsonInfoList { + char *json_string; + struct JsonInfoList *next; +}; + /* structure to store the sids/gids/etc the detection engine * found in this packet */ typedef struct PacketAlert_ { @@ -243,6 +248,7 @@ typedef struct PacketAlert_ { const struct Signature_ *s; uint64_t tx_id; /* Used for sorting */ int64_t frame_id; + struct JsonInfoList json_info; } PacketAlert; /* flag to indicate the rule action (drop/pass) needs to be applied to the flow */ @@ -274,6 +280,7 @@ typedef struct PacketAlerts_ { } PacketAlerts; PacketAlert *PacketAlertCreate(void); +void PacketAlertRecycle(PacketAlert *pa_array); void PacketAlertFree(PacketAlert *pa); diff --git a/src/detect-datajson.c b/src/detect-datajson.c new file mode 100644 index 000000000000..1b0687bb2247 --- /dev/null +++ b/src/detect-datajson.c @@ -0,0 +1,369 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + * + * Based on detect-dataset.c by Victor Julien + * + * Implements the datajson keyword + */ + +#include "suricata-common.h" +#include "detect.h" +#include "datasets.h" +#include "datasets-json.h" +#include "detect-datajson.h" + +#include "detect-parse.h" +#include "detect-engine.h" + +#include "util-debug.h" +#include "util-misc.h" +#include "util-path.h" + +static int DetectDatajsonSetup(DetectEngineCtx *, Signature *, const char *); +void DetectDatajsonFree(DetectEngineCtx *, void *); + +void DetectDatajsonRegister(void) +{ + sigmatch_table[DETECT_DATAJSON].name = "datajson"; + sigmatch_table[DETECT_DATAJSON].desc = + "match sticky buffer against datasets with json extra data (experimental)"; + sigmatch_table[DETECT_DATAJSON].url = "/rules/dataset-keywords.html#datajson"; + sigmatch_table[DETECT_DATAJSON].Setup = DetectDatajsonSetup; + sigmatch_table[DETECT_DATAJSON].Free = DetectDatajsonFree; +} + +/* + 1 match + 0 no match + -1 can't match + */ +int DetectDatajsonBufferMatch(DetectEngineThreadCtx *det_ctx, const DetectDatajsonData *sd, + const uint8_t *data, const uint32_t data_len) +{ + if (data == NULL || data_len == 0) + return 0; + + switch (sd->cmd) { + case DETECT_DATAJSON_CMD_ISSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatasetLookupwJson(sd->set, data, data_len); + SCLogDebug("r found: %d, len: %zu", r.found, r.json.len); + if (!r.found) + return 0; + if (r.json.len > 0) { + if ((det_ctx->json_content_len < SIG_JSON_CONTENT_ARRAY_LEN) && + (r.json.len + strlen(sd->json_key) + 3 < SIG_JSON_CONTENT_ITEM_LEN)) { + snprintf(det_ctx->json_content[det_ctx->json_content_len].json_content, + SIG_JSON_CONTENT_ITEM_LEN, "\"%s\":%s", sd->json_key, r.json.value); + det_ctx->json_content[det_ctx->json_content_len].id = sd->id; + det_ctx->json_content_len++; + } + } + return 1; + } + case DETECT_DATAJSON_CMD_ISNOTSET: { + // PrintRawDataFp(stdout, data, data_len); + DataJsonResultType r = DatasetLookupwJson(sd->set, data, data_len); + SCLogDebug("r found: %d, len: %zu", r.found, r.json.len); + if (r.found) + return 0; + return 1; + } + default: + abort(); + } + return 0; +} + +static int DetectDatajsonParse(const char *str, char *cmd, int cmd_len, char *name, int name_len, + enum DatasetTypes *type, char *load, size_t load_size, uint64_t *memcap, uint32_t *hashsize, + char *json_key, size_t json_key_size) +{ + bool cmd_set = false; + bool name_set = false; + bool load_set = false; + + char copy[strlen(str) + 1]; + strlcpy(copy, str, sizeof(copy)); + char *xsaveptr = NULL; + char *key = strtok_r(copy, ",", &xsaveptr); + while (key != NULL) { + while (*key != '\0' && isblank(*key)) { + key++; + } + char *val = strchr(key, ' '); + if (val != NULL) { + *val++ = '\0'; + while (*val != '\0' && isblank(*val)) { + val++; + SCLogDebug("cmd %s val %s", key, val); + } + } else { + SCLogDebug("cmd %s", key); + } + + if (strlen(key) == 0) { + goto next; + } + + if (!cmd_set) { + if (val && strlen(val) != 0) { + return -1; + } + strlcpy(cmd, key, cmd_len); + cmd_set = true; + } else if (!name_set) { + if (val && strlen(val) != 0) { + return -1; + } + strlcpy(name, key, name_len); + name_set = true; + } else { + if (val == NULL) { + return -1; + } + + if (strcmp(key, "type") == 0) { + SCLogDebug("type %s", val); + + if (strcmp(val, "md5") == 0) { + *type = DATASET_TYPE_MD5; + } else if (strcmp(val, "sha256") == 0) { + *type = DATASET_TYPE_SHA256; + } else if (strcmp(val, "string") == 0) { + *type = DATASET_TYPE_STRING; + } else if (strcmp(val, "ipv4") == 0) { + *type = DATASET_TYPE_IPV4; + } else if (strcmp(val, "ipv6") == 0) { + *type = DATASET_TYPE_IPV6; + } else if (strcmp(val, "ip") == 0) { + *type = DATASET_TYPE_IPV6; + } else { + SCLogError("bad type %s", val); + return -1; + } + + } else if (strcmp(key, "load") == 0) { + if (load_set) { + SCLogWarning("'load' can only appear once"); + return -1; + } + SCLogDebug("load %s", val); + strlcpy(load, val, load_size); + load_set = true; + } else if (strcmp(key, "key") == 0) { + if (strlen(key) > json_key_size) { + SCLogWarning("'key' value too long (limit is %" PRIu64 ")", json_key_size); + return -1; + } + strlcpy(json_key, val, json_key_size); + load_set = true; + } + + if (strcmp(key, "memcap") == 0) { + if (ParseSizeStringU64(val, memcap) < 0) { + SCLogWarning("invalid value for memcap: %s," + " resetting to default", + val); + *memcap = 0; + } + } + if (strcmp(key, "hashsize") == 0) { + if (ParseSizeStringU32(val, hashsize) < 0) { + SCLogWarning("invalid value for hashsize: %s," + " resetting to default", + val); + *hashsize = 0; + } + } + } + + SCLogDebug("key: %s, value: %s", key, val); + + next: + key = strtok_r(NULL, ",", &xsaveptr); + } + + /* Trim trailing whitespace. */ + while (strlen(name) > 0 && isblank(name[strlen(name) - 1])) { + name[strlen(name) - 1] = '\0'; + } + + /* Validate name, spaces are not allowed. */ + for (size_t i = 0; i < strlen(name); i++) { + if (isblank(name[i])) { + SCLogError("spaces not allowed in dataset names"); + return 0; + } + } + + return 1; +} + +/** \brief wrapper around dirname that does leave input untouched */ +static void GetDirName(const char *in, char *out, size_t outs) +{ + if (strlen(in) == 0) { + return; + } + + size_t size = strlen(in) + 1; + char tmp[size]; + strlcpy(tmp, in, size); + + char *dir = dirname(tmp); + BUG_ON(dir == NULL); + strlcpy(out, dir, outs); + return; +} + +static int SetupLoadPath(const DetectEngineCtx *de_ctx, char *load, size_t load_size) +{ + SCLogDebug("load %s", load); + + if (PathIsAbsolute(load)) { + return 0; + } + + bool done = false; +#ifdef HAVE_LIBGEN_H + BUG_ON(de_ctx->rule_file == NULL); + + char dir[PATH_MAX] = ""; + GetDirName(de_ctx->rule_file, dir, sizeof(dir)); + + SCLogDebug("rule_file %s dir %s", de_ctx->rule_file, dir); + char path[PATH_MAX]; + if (snprintf(path, sizeof(path), "%s/%s", dir, load) >= (int)sizeof(path)) // TODO windows path + return -1; + + if (SCPathExists(path)) { + done = true; + strlcpy(load, path, load_size); + SCLogDebug("using path '%s' (HAVE_LIBGEN_H)", load); + } +#endif + if (!done) { + char *loadp = DetectLoadCompleteSigPath(de_ctx, load); + if (loadp == NULL) { + return -1; + } + SCLogDebug("loadp %s", loadp); + + if (SCPathExists(loadp)) { + strlcpy(load, loadp, load_size); + SCLogDebug("using path '%s' (non-HAVE_LIBGEN_H)", load); + } + SCFree(loadp); + } + return 0; +} + +int DetectDatajsonSetup(DetectEngineCtx *de_ctx, Signature *s, const char *rawstr) +{ + DetectDatajsonData *cd = NULL; + SigMatch *sm = NULL; + uint8_t cmd = 0; + uint64_t memcap = 0; + uint32_t hashsize = 0; + char cmd_str[16] = "", name[DATASET_NAME_MAX_LEN + 1] = ""; + enum DatasetTypes type = DATASET_TYPE_NOTSET; + char load[PATH_MAX] = ""; + char json_key[SIG_JSON_CONTENT_KEY_LEN] = ""; + size_t json_key_size = SIG_JSON_CONTENT_KEY_LEN; + + if (DetectBufferGetActiveList(de_ctx, s) == -1) { + SCLogError("datajson is only supported for sticky buffers"); + SCReturnInt(-1); + } + + int list = s->init_data->list; + if (list == DETECT_SM_LIST_NOTSET) { + SCLogError("datajson is only supported for sticky buffers"); + SCReturnInt(-1); + } + + if (!DetectDatajsonParse(rawstr, cmd_str, sizeof(cmd_str), name, sizeof(name), &type, load, + sizeof(load), &memcap, &hashsize, json_key, json_key_size)) { + return -1; + } + + if (strcmp(cmd_str, "isset") == 0) { + cmd = DETECT_DATAJSON_CMD_ISSET; + } else if (strcmp(cmd_str, "isnotset") == 0) { + cmd = DETECT_DATAJSON_CMD_ISNOTSET; + } else { + SCLogError("datajson action \"%s\" is not supported.", cmd_str); + return -1; + } + + if (strlen(load) != 0) { + if (SetupLoadPath(de_ctx, load, sizeof(load)) != 0) + return -1; + } + + if (strlen(json_key) == 0) { + SCLogError("datajson needs a key parameter"); + return -1; + } + + SCLogDebug("name '%s' load '%s'", name, load); + Dataset *set = DatasetJsonGet(name, type, load, memcap, hashsize); + if (set == NULL) { + SCLogError("failed to set up datajson '%s'.", name); + return -1; + } + if (set->hash && SC_ATOMIC_GET(set->hash->memcap_reached)) { + SCLogError("datajson too large for set memcap"); + return -1; + } + + cd = SCCalloc(1, sizeof(DetectDatajsonData)); + if (unlikely(cd == NULL)) + goto error; + + cd->set = set; + cd->cmd = cmd; + strlcpy(cd->json_key, json_key, json_key_size); + cd->id = s; + + SCLogDebug("cmd %s, name %s", cmd_str, strlen(name) ? name : "(none)"); + + SigMatchAppendSMToList(de_ctx, s, DETECT_DATAJSON, (SigMatchCtx *)cd, list); + return 0; + +error: + if (cd != NULL) + SCFree(cd); + if (sm != NULL) + SCFree(sm); + return -1; +} + +void DetectDatajsonFree(DetectEngineCtx *de_ctx, void *ptr) +{ + DetectDatajsonData *fd = (DetectDatajsonData *)ptr; + if (fd == NULL) + return; + + SCFree(fd); +} diff --git a/src/detect-datajson.h b/src/detect-datajson.h new file mode 100644 index 000000000000..ae63cab28dba --- /dev/null +++ b/src/detect-datajson.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2023 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#ifndef __DETECT_DATAJSON_H__ +#define __DETECT_DATAJSON_H__ + +#include "datasets.h" + +#define DETECT_DATAJSON_CMD_ISSET 1 +#define DETECT_DATAJSON_CMD_ISNOTSET 2 + +typedef struct DetectDatajsonData_ { + Dataset *set; + uint8_t cmd; + DataJsonType json; + char json_key[SIG_JSON_CONTENT_KEY_LEN]; + void *id; +} DetectDatajsonData; + +int DetectDatajsonBufferMatch(DetectEngineThreadCtx *det_ctx, const DetectDatajsonData *sd, + const uint8_t *data, const uint32_t data_len); + +/* prototypes */ +void DetectDatajsonRegister(void); + +#endif /* __DETECT_DATAJSON_H__ */ diff --git a/src/detect-dataset.c b/src/detect-dataset.c index ae23925f2c11..f702b7b91271 100644 --- a/src/detect-dataset.c +++ b/src/detect-dataset.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2018-2020 Open Information Security Foundation +/* Copyright (C) 2018-2024 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free diff --git a/src/detect-engine-alert.c b/src/detect-engine-alert.c index 01452ecf89ae..3d2a132f9c1f 100644 --- a/src/detect-engine-alert.c +++ b/src/detect-engine-alert.c @@ -274,6 +274,29 @@ static inline PacketAlert PacketAlertSet( /* Set tx_id if the frame has it */ pa.tx_id = tx_id; pa.frame_id = (alert_flags & PACKET_ALERT_FLAG_FRAME) ? det_ctx->frame_id : 0; + pa.json_info.json_string = NULL; + pa.json_info.next = NULL; + if (det_ctx->json_content_len) { + /* We have some JSON attached in the current detection so let's try + to see if some need to be used for current signature. */ + struct JsonInfoList *current_json = &pa.json_info; + for (size_t i = 0; i < det_ctx->json_content_len; i++) { + if (s == det_ctx->json_content[i].id) { + if (current_json->json_string != NULL) { + struct JsonInfoList *next_json = SCCalloc(1, sizeof(struct JsonInfoList)); + if (next_json) { + current_json->next = next_json; + current_json = next_json; + current_json->next = NULL; + } else { + /* Allocation error, let's return now */ + return pa; + } + } + current_json->json_string = det_ctx->json_content[i].json_content; + } + } + } return pa; } diff --git a/src/detect-engine-content-inspection.c b/src/detect-engine-content-inspection.c index e43e693b2151..ec3834e1fdc5 100644 --- a/src/detect-engine-content-inspection.c +++ b/src/detect-engine-content-inspection.c @@ -52,6 +52,7 @@ #include "detect-base64-data.h" #include "detect-dataset.h" #include "detect-datarep.h" +#include "detect-datajson.h" #include "util-spm.h" #include "util-debug.h" @@ -638,6 +639,16 @@ static int DetectEngineContentInspectionInternal(DetectEngineThreadCtx *det_ctx, } goto no_match_discontinue; + } else if (smd->type == DETECT_DATAJSON) { + + // PrintRawDataFp(stdout, buffer, buffer_len); + const DetectDatajsonData *sd = (const DetectDatajsonData *)smd->ctx; + int r = DetectDatajsonBufferMatch(det_ctx, sd, buffer, buffer_len); // TODO buffer offset? + if (r == 1) { + goto match; + } + goto no_match_discontinue; + } else if (smd->type == DETECT_AL_URILEN) { SCLogDebug("inspecting uri len"); diff --git a/src/detect-engine-register.c b/src/detect-engine-register.c index 9bddf0fd8437..789e5244608f 100644 --- a/src/detect-engine-register.c +++ b/src/detect-engine-register.c @@ -140,6 +140,7 @@ #include "detect-filesize.h" #include "detect-dataset.h" #include "detect-datarep.h" +#include "detect-datajson.h" #include "detect-dsize.h" #include "detect-flowvar.h" #include "detect-flowint.h" @@ -595,6 +596,7 @@ void SigTableSetup(void) DetectDsizeRegister(); DetectDatasetRegister(); DetectDatarepRegister(); + DetectDatajsonRegister(); DetectFlowvarRegister(); DetectFlowintRegister(); DetectPktvarRegister(); diff --git a/src/detect-engine-register.h b/src/detect-engine-register.h index b7a029998555..6e81e8a8d8cd 100644 --- a/src/detect-engine-register.h +++ b/src/detect-engine-register.h @@ -88,6 +88,7 @@ enum DetectKeywordId { DETECT_BYTE_EXTRACT, DETECT_DATASET, DETECT_DATAREP, + DETECT_DATAJSON, DETECT_BASE64_DECODE, DETECT_BASE64_DATA, DETECT_BSIZE, diff --git a/src/detect-pcre.c b/src/detect-pcre.c index fdbd8fd87ad4..3176b97c9c74 100644 --- a/src/detect-pcre.c +++ b/src/detect-pcre.c @@ -156,6 +156,43 @@ void DetectPcreRegister (void) #endif } +static void DetectAlertStoreMatch(DetectEngineThreadCtx *det_ctx, const Signature *s, uint32_t idx, + uint8_t *str_ptr, uint16_t capture_len) +{ + /* We need the key */ + const char *json_key = VarNameStoreLookupById(idx, VAR_TYPE_ALERT_VAR); + + if (json_key == NULL) + return; + + SCLogDebug("json key: %s", json_key); + /* Setup the data*/ + if ((det_ctx->json_content_len < SIG_JSON_CONTENT_ARRAY_LEN) && + (capture_len + strlen(json_key) + 5 < SIG_JSON_CONTENT_ITEM_LEN)) { + JsonBuilder *js = jb_new_object(); + if (unlikely(js == NULL)) { + return; + } + jb_set_string_from_bytes(js, json_key, str_ptr, capture_len); + uint32_t js_len = jb_len(js); + if (js_len > SIG_JSON_CONTENT_ITEM_LEN) { + SCLogDebug("Captured length is too long for JSON."); + SCFree(str_ptr); + return; + } + /* Copy js but skip the starting curly bracket to just get the inner data */ + memcpy(det_ctx->json_content[det_ctx->json_content_len].json_content, jb_ptr(js) + 1, + js_len - 1); + /* end the string as we have used memcpy */ + det_ctx->json_content[det_ctx->json_content_len].json_content[js_len - 1] = 0; + det_ctx->json_content[det_ctx->json_content_len].id = (void *)s; + det_ctx->json_content_len++; + jb_free(js); + } + + SCFree(str_ptr); +} + /** * \brief Match a regex on a single payload. * @@ -280,6 +317,11 @@ int DetectPcrePayloadMatch(DetectEngineThreadCtx *det_ctx, const Signature *s, } else if (pe->captypes[x] == VAR_TYPE_FLOW_VAR && f != NULL) { (void)DetectVarStoreMatch(det_ctx, pe->capids[x], (uint8_t *)str_ptr, (uint16_t)capture_len, DETECT_VAR_TYPE_FLOW_POSTMATCH); + + } else if (pe->captypes[x] == VAR_TYPE_ALERT_VAR) { + (void)DetectAlertStoreMatch(det_ctx, s, pe->capids[x], (uint8_t *)str_ptr, + (uint16_t)capture_len); + } else { BUG_ON(1); // Impossible captype SCFree(str_ptr); @@ -363,20 +405,32 @@ static DetectPcreData *DetectPcreParse (DetectEngineCtx *de_ctx, int cut_capture = 0; char *fcap = strstr(regexstr, "flow:"); char *pcap = strstr(regexstr, "pkt:"); + char *acap = strstr(regexstr, "alert:"); /* take the size of the whole input as buffer size for the regex we will * extract below. Add 1 to please Coverity's alloc_strlen test. */ size_t slen = strlen(regexstr) + 1; - if (fcap || pcap) { + if (fcap || pcap || acap) { SCLogDebug("regexstr %s", regexstr); - if (fcap && !pcap) + bool a_set = false; + cut_capture = 0; + if (fcap) { + a_set = true; cut_capture = fcap - regexstr; - else if (pcap && !fcap) - cut_capture = pcap - regexstr; - else { - BUG_ON(pcap == NULL); // added to assist cppcheck - BUG_ON(fcap == NULL); - cut_capture = MIN((pcap - regexstr), (fcap - regexstr)); + } + if (pcap) { + if (a_set) + cut_capture = MIN(cut_capture, (pcap - regexstr)); + else { + cut_capture = pcap - regexstr; + a_set = true; + } + } + if (acap) { + if (a_set) + cut_capture = MIN(cut_capture, (acap - regexstr)); + else + cut_capture = acap - regexstr; } SCLogDebug("cut_capture %d", cut_capture); @@ -761,6 +815,12 @@ static int DetectPcreParseCapture(const char *regexstr, DetectEngineCtx *de_ctx, SCLogDebug("id %u type %u", pd->capids[pd->idx], pd->captypes[pd->idx]); pd->idx++; + } else if (strncmp(name_array[name_idx], "alert:", 6) == 0) { + pd->capids[pd->idx] = + VarNameStoreRegister(name_array[name_idx] + 6, VAR_TYPE_ALERT_VAR); + pd->captypes[pd->idx] = VAR_TYPE_ALERT_VAR; + pd->idx++; + } else { SCLogError(" pkt/flow " "var capture names must start with 'pkt:' or 'flow:'"); @@ -826,6 +886,10 @@ static int DetectPcreParseCapture(const char *regexstr, DetectEngineCtx *de_ctx, pd->capids[pd->idx] = VarNameStoreRegister((char *)capture_str, VAR_TYPE_FLOW_VAR); pd->captypes[pd->idx] = VAR_TYPE_FLOW_VAR; pd->idx++; + } else if (strcmp(type_str, "alert") == 0) { + pd->capids[pd->idx] = VarNameStoreRegister((char *)capture_str, VAR_TYPE_ALERT_VAR); + pd->captypes[pd->idx] = VAR_TYPE_ALERT_VAR; + pd->idx++; } //SCLogNotice("pd->capname %s", pd->capname); diff --git a/src/detect.c b/src/detect.c index ce107a52bc22..1c962dfa2ee2 100644 --- a/src/detect.c +++ b/src/detect.c @@ -869,6 +869,7 @@ static DetectRunScratchpad DetectRunSetup( det_ctx->base64_decoded_len = 0; det_ctx->raw_stream_progress = 0; det_ctx->match_array_cnt = 0; + det_ctx->json_content_len = 0; det_ctx->alert_queue_size = 0; p->alerts.drop.action = 0; diff --git a/src/detect.h b/src/detect.h index 4e31c5fe0284..7133b9dec3d1 100644 --- a/src/detect.h +++ b/src/detect.h @@ -1092,6 +1092,16 @@ typedef struct RuleMatchCandidateTx { const Signature *s; /**< ptr to sig */ } RuleMatchCandidateTx; +#define SIG_JSON_CONTENT_ARRAY_LEN 16 +#define SIG_JSON_CONTENT_ITEM_LEN 1024 +#define SIG_JSON_CONTENT_KEY_LEN 32 + +/** structure to store the json content with info on sig that triggered it */ +typedef struct SigJsonContent { + void *id; + char json_content[SIG_JSON_CONTENT_ITEM_LEN]; +} SigJsonContent; + /** * Detection engine thread data. */ @@ -1141,6 +1151,9 @@ typedef struct DetectEngineThreadCtx_ { int base64_decoded_len; int base64_decoded_len_max; + SigJsonContent json_content[SIG_JSON_CONTENT_ARRAY_LEN]; + size_t json_content_len; + /* counter for the filestore array below -- up here for cache reasons. */ uint16_t filestore_cnt; diff --git a/src/output-json-alert.c b/src/output-json-alert.c index c5c15d32b5a5..c8bb8acf82b3 100644 --- a/src/output-json-alert.c +++ b/src/output-json-alert.c @@ -253,6 +253,15 @@ void AlertJsonHeader(const Packet *p, const PacketAlert *pa, JsonBuilder *js, ui AlertJsonMetadata(pa, js); } + if (pa->json_info.json_string != NULL) { + jb_open_object(js, "extra"); + const struct JsonInfoList *json_info = &pa->json_info; + while (json_info) { + jb_set_formatted(js, json_info->json_string); + json_info = json_info->next; + } + jb_close(js); + } if (flags & LOG_JSON_RULE) { jb_set_string(js, "rule", pa->s->sig_str); } diff --git a/src/packet.c b/src/packet.c index cb6dcf618380..b30bca044ac1 100644 --- a/src/packet.c +++ b/src/packet.c @@ -125,6 +125,7 @@ void PacketReinit(Packet *p) p->alerts.discarded = 0; p->alerts.suppressed = 0; p->alerts.drop.action = 0; + PacketAlertRecycle(p->alerts.alerts); p->pcap_cnt = 0; p->tunnel_rtv_cnt = 0; p->tunnel_tpr_cnt = 0; diff --git a/src/runmode-unix-socket.c b/src/runmode-unix-socket.c index 3c390e99a6db..76b32c7d9dab 100644 --- a/src/runmode-unix-socket.c +++ b/src/runmode-unix-socket.c @@ -806,6 +806,127 @@ TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data) } } +/** + * \brief Command to add data to a datajson + * + * \param cmd the content of command Arguments as a json_t object + * \param answer the json_t object that has to be used to answer + * \param data pointer to data defining the context here a PcapCommand:: + */ +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data) +{ + /* 1 get dataset name */ + json_t *narg = json_object_get(cmd, "setname"); + if (!json_is_string(narg)) { + json_object_set_new(answer, "message", json_string("setname is not a string")); + return TM_ECODE_FAILED; + } + const char *set_name = json_string_value(narg); + + /* 2 get the data type */ + json_t *targ = json_object_get(cmd, "settype"); + if (!json_is_string(targ)) { + json_object_set_new(answer, "message", json_string("settype is not a string")); + return TM_ECODE_FAILED; + } + const char *type = json_string_value(targ); + + /* 3 get value */ + json_t *varg = json_object_get(cmd, "datavalue"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datavalue is not string")); + return TM_ECODE_FAILED; + } + const char *value = json_string_value(varg); + + /* 4 get json */ + json_t *jarg = json_object_get(cmd, "datajson"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datajson is not string")); + return TM_ECODE_FAILED; + } + const char *json = json_string_value(jarg); + + SCLogDebug("datajson-add: %s type %s value %s json %s", set_name, type, value, json); + + enum DatasetTypes t = DatasetGetTypeFromString(type); + if (t == DATASET_TYPE_NOTSET) { + json_object_set_new(answer, "message", json_string("unknown settype")); + return TM_ECODE_FAILED; + } + + Dataset *set = DatasetFind(set_name, t); + if (set == NULL) { + json_object_set_new(answer, "message", json_string("set not found or wrong type")); + return TM_ECODE_FAILED; + } + + int r = DatajsonAddSerialized(set, value, json); + if (r == 1) { + json_object_set_new(answer, "message", json_string("data added")); + return TM_ECODE_OK; + } else if (r == 0) { + json_object_set_new(answer, "message", json_string("data already in set")); + return TM_ECODE_OK; + } else { + json_object_set_new(answer, "message", json_string("failed to add data")); + return TM_ECODE_FAILED; + } +} + +TmEcode UnixSocketDatajsonRemove(json_t *cmd, json_t *answer, void *data) +{ + /* 1 get dataset name */ + json_t *narg = json_object_get(cmd, "setname"); + if (!json_is_string(narg)) { + json_object_set_new(answer, "message", json_string("setname is not a string")); + return TM_ECODE_FAILED; + } + const char *set_name = json_string_value(narg); + + /* 2 get the data type */ + json_t *targ = json_object_get(cmd, "settype"); + if (!json_is_string(targ)) { + json_object_set_new(answer, "message", json_string("settype is not a string")); + return TM_ECODE_FAILED; + } + const char *type = json_string_value(targ); + + /* 3 get value */ + json_t *varg = json_object_get(cmd, "datavalue"); + if (!json_is_string(varg)) { + json_object_set_new(answer, "message", json_string("datavalue is not string")); + return TM_ECODE_FAILED; + } + const char *value = json_string_value(varg); + + SCLogDebug("datajson-remove: %s type %s value %s", set_name, type, value); + + enum DatasetTypes t = DatasetGetTypeFromString(type); + if (t == DATASET_TYPE_NOTSET) { + json_object_set_new(answer, "message", json_string("unknown settype")); + return TM_ECODE_FAILED; + } + + Dataset *set = DatasetFind(set_name, t); + if (set == NULL) { + json_object_set_new(answer, "message", json_string("set not found or wrong type")); + return TM_ECODE_FAILED; + } + + int r = DatajsonRemoveSerialized(set, value); + if (r == 1) { + json_object_set_new(answer, "message", json_string("data removed")); + return TM_ECODE_OK; + } else if (r == 0) { + json_object_set_new(answer, "message", json_string("data is busy, try again")); + return TM_ECODE_OK; + } else { + json_object_set_new(answer, "message", json_string("failed to remove data")); + return TM_ECODE_FAILED; + } +} + static bool JsonU32Value(json_t *jarg, uint32_t *ret) { int64_t r = json_integer_value(jarg); diff --git a/src/runmode-unix-socket.h b/src/runmode-unix-socket.h index 8ea432f57dad..d1f90a6e8456 100644 --- a/src/runmode-unix-socket.h +++ b/src/runmode-unix-socket.h @@ -38,6 +38,9 @@ TmEcode UnixSocketDatasetRemove(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketDatasetDump(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetClear(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketDatasetLookup(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonAdd(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonRemove(json_t *cmd, json_t *answer, void *data); +TmEcode UnixSocketDatajsonReplace(json_t *cmd, json_t *answer, void *data); TmEcode UnixSocketRegisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketUnregisterTenantHandler(json_t *cmd, json_t* answer, void *data); TmEcode UnixSocketRegisterTenant(json_t *cmd, json_t* answer, void *data); diff --git a/src/unix-manager.c b/src/unix-manager.c index 38baaac22011..ba0a04e8d0cb 100644 --- a/src/unix-manager.c +++ b/src/unix-manager.c @@ -1111,6 +1111,9 @@ int UnixManagerInit(void) UnixManagerRegisterCommand("dataset-add", UnixSocketDatasetAdd, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-remove", UnixSocketDatasetRemove, &command, UNIX_CMD_TAKE_ARGS); + UnixManagerRegisterCommand("datajson-add", UnixSocketDatajsonAdd, &command, UNIX_CMD_TAKE_ARGS); + UnixManagerRegisterCommand( + "datajson-remove", UnixSocketDatajsonRemove, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand( "get-flow-stats-by-id", UnixSocketGetFlowStatsById, &command, UNIX_CMD_TAKE_ARGS); UnixManagerRegisterCommand("dataset-dump", UnixSocketDatasetDump, NULL, 0); diff --git a/src/util-var.h b/src/util-var.h index 4732b46e479c..ea8d7233d1e8 100644 --- a/src/util-var.h +++ b/src/util-var.h @@ -44,6 +44,8 @@ enum VarTypes { VAR_TYPE_IPPAIR_BIT, VAR_TYPE_IPPAIR_INT, VAR_TYPE_IPPAIR_VAR, + + VAR_TYPE_ALERT_VAR, }; /** \todo see ticket #6855. The type field should be 16 bits. */