From 0e3997f0ad84cc667bbb1491093f99e8269d380f Mon Sep 17 00:00:00 2001 From: Florian Wilhelm Date: Thu, 15 Aug 2024 15:06:51 +0200 Subject: [PATCH 1/3] Bootstrap sql schema for cve --- .gitignore | 2 ++ jsonl-to-sql.py | 39 +++++++++++++++++++++++++++++++++++++++ sql-dump-to-jsonl.py | 18 ++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 jsonl-to-sql.py create mode 100644 sql-dump-to-jsonl.py diff --git a/.gitignore b/.gitignore index b8f6288..1760eb8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ glvd.sql +all-cve.jsonl +few-cve.jsonl diff --git a/jsonl-to-sql.py b/jsonl-to-sql.py new file mode 100644 index 0000000..6e5fda0 --- /dev/null +++ b/jsonl-to-sql.py @@ -0,0 +1,39 @@ +import json +import string +import html + +printable = set(string.printable) + +def attribute_value_or_null(obj, attr): + try: + val = obj[attr] + if isinstance(val, list): + # fixme: handle cases of array length != 1 + x = val[0] + return f"ARRAY[('{x['lang']}', '{x['value']}')::description]" + + if isinstance(val, str): + txt = html.escape(val, True) + txt = ''.join(filter(lambda x: x in printable, txt)) + return f"'{txt}'" + except: + return 'null' + + +def object_to_sql_insert(table_name, obj, columns): + values = [] + for c in columns: + values.append(attribute_value_or_null(obj, c)) + return f'INSERT INTO {table_name} ({",".join(columns)}) VALUES ({",".join(values)});' + + +with open('few-cve.jsonl', 'r') as f: + for line in f: + try: + cve = json.loads(line) + print(object_to_sql_insert('cve_item', cve, + ['id', 'sourceIdentifier', 'vulnStatus', 'published', 'lastModified', 'evaluatorComment', 'evaluatorSolution', 'cisaExploitAdd', 'cisaActionDue', 'cisaRequiredAction', 'cisaVulnerabilityName', 'cveTags', 'descriptions'] + ) + ) + except: + print('error') diff --git a/sql-dump-to-jsonl.py b/sql-dump-to-jsonl.py new file mode 100644 index 0000000..c39e8f4 --- /dev/null +++ b/sql-dump-to-jsonl.py @@ -0,0 +1,18 @@ +import json + +""" +Script to convert a database dump into a jsonl file tht contains the results from the NIST api. +""" + +with open('glvd.sql', 'r') as f: + for line in f: + if 'sourceIdentifier' in line: + elements = line.split('\t') + cve_json = elements[2].replace('\\"', '\"') + try: + cve = json.loads(cve_json) + except: + print(cve_json) + continue + with open('all-cve.jsonl', 'a+') as ff: + ff.write(cve_json) From f424bbbe1200a57e10cb0bbaba7bfa62bdbf78b5 Mon Sep 17 00:00:00 2001 From: Florian Wilhelm Date: Thu, 15 Aug 2024 15:32:15 +0200 Subject: [PATCH 2/3] draft schema --- Containerfile | 4 ++-- schema.sql | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 schema.sql diff --git a/Containerfile b/Containerfile index 373a14b..be1a02f 100644 --- a/Containerfile +++ b/Containerfile @@ -1,3 +1,3 @@ -FROM ghcr.io/gardenlinux/glvd-postgres:edgenontls +FROM ghcr.io/gardenlinux/glvd-postgres:edgenotls -COPY glvd.sql /docker-entrypoint-initdb.d/glvd.sql +COPY schema.sql /docker-entrypoint-initdb.d/schema.sql diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..5b6b5ae --- /dev/null +++ b/schema.sql @@ -0,0 +1,55 @@ +CREATE TYPE description AS (lang TEXT, value TEXT); + +CREATE TYPE tag AS (sourceIdentifier TEXT, tags TEXT []); + +CREATE TYPE reference AS ("url" TEXT, source TEXT, tags TEXT []); + +CREATE TYPE cvssMetricV40 AS ( + source TEXT, + "type" TEXT, + vector text --fixme just for testing +); + +CREATE TYPE cvssMetricV31 AS ( + source TEXT, + "type" TEXT, + vector text --fixme just for testing +); + +CREATE TYPE cvssMetricV30 AS ( + source TEXT, + "type" TEXT, + vector text --fixme just for testing +); + +CREATE TYPE cvssMetricV2 AS ( + source TEXT, + "type" TEXT, + vector text --fixme just for testing +); + +CREATE TYPE metrics AS ( + v40 cvssMetricV40 [], + v31 cvssMetricV31 [], + v30 cvssMetricV30 [], + v2 cvssMetricV2 [] +); + +CREATE TABLE cve_item ( + id text NOT NULL, + sourceIdentifier TEXT, + vulnStatus TEXT, + published TEXT, + lastModified TEXT, + evaluatorComment TEXT, + evaluatorSolution TEXT, + cisaExploitAdd TEXT, + cisaActionDue TEXT, + cisaRequiredAction TEXT, + cisaVulnerabilityName TEXT, + -- this seems to be empty always.. have not found a counter sample yet + cveTags tag [], + descriptions description [], + "references" reference [], + CONSTRAINT cve_item_pk PRIMARY KEY (id) +); From fc9fe4e0937755fdf2ecc212fe04325ef23d01f9 Mon Sep 17 00:00:00 2001 From: Florian Wilhelm Date: Fri, 16 Aug 2024 14:13:39 +0200 Subject: [PATCH 3/3] update schema --- jsonl-to-sql.py | 25 +++++++++++++++---------- schema.sql | 3 ++- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/jsonl-to-sql.py b/jsonl-to-sql.py index 6e5fda0..64f6bab 100644 --- a/jsonl-to-sql.py +++ b/jsonl-to-sql.py @@ -9,8 +9,14 @@ def attribute_value_or_null(obj, attr): val = obj[attr] if isinstance(val, list): # fixme: handle cases of array length != 1 - x = val[0] - return f"ARRAY[('{x['lang']}', '{x['value']}')::description]" + element = val[0] + values = ', '.join('\'' + item + '\'' for item in element.values()) + return f"ARRAY[({values})::{attr}]" + + # metrics is a dict + # converting this seems to me more complex + # if isinstance(val, dict): + # return 'foo' if isinstance(val, str): txt = html.escape(val, True) @@ -29,11 +35,10 @@ def object_to_sql_insert(table_name, obj, columns): with open('few-cve.jsonl', 'r') as f: for line in f: - try: - cve = json.loads(line) - print(object_to_sql_insert('cve_item', cve, - ['id', 'sourceIdentifier', 'vulnStatus', 'published', 'lastModified', 'evaluatorComment', 'evaluatorSolution', 'cisaExploitAdd', 'cisaActionDue', 'cisaRequiredAction', 'cisaVulnerabilityName', 'cveTags', 'descriptions'] - ) - ) - except: - print('error') + cve = json.loads(line) + print(object_to_sql_insert('cve_item', cve, + ['id', 'sourceIdentifier', 'vulnStatus', 'published', 'lastModified', 'evaluatorComment', + 'evaluatorSolution', 'cisaExploitAdd', 'cisaActionDue', 'cisaRequiredAction', 'cisaVulnerabilityName', + 'cveTags', 'descriptions', 'references'] + ) + ) diff --git a/schema.sql b/schema.sql index 5b6b5ae..52f48f3 100644 --- a/schema.sql +++ b/schema.sql @@ -2,7 +2,7 @@ CREATE TYPE description AS (lang TEXT, value TEXT); CREATE TYPE tag AS (sourceIdentifier TEXT, tags TEXT []); -CREATE TYPE reference AS ("url" TEXT, source TEXT, tags TEXT []); +CREATE TYPE reference AS ("url" TEXT, source TEXT); CREATE TYPE cvssMetricV40 AS ( source TEXT, @@ -51,5 +51,6 @@ CREATE TABLE cve_item ( cveTags tag [], descriptions description [], "references" reference [], + metrics metrics, CONSTRAINT cve_item_pk PRIMARY KEY (id) );