Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bootstrap sql schema for cve #7

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
glvd.sql
all-cve.jsonl
few-cve.jsonl
4 changes: 2 additions & 2 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
FROM ghcr.io/gardenlinux/glvd-postgres:edgenontls
FROM ghcr.io/gardenlinux/glvd-postgres:edgenotls

COPY glvd.sql /docker-entrypoint-initdb.d/glvd.sql
COPY schema.sql /docker-entrypoint-initdb.d/schema.sql
44 changes: 44 additions & 0 deletions jsonl-to-sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import json
import string
import html

printable = set(string.printable)

def attribute_value_or_null(obj, attr):
try:
val = obj[attr]
if isinstance(val, list):
# fixme: handle cases of array length != 1
element = val[0]
values = ', '.join('\'' + item + '\'' for item in element.values())
return f"ARRAY[({values})::{attr}]"

# metrics is a dict
# converting this seems to me more complex
# if isinstance(val, dict):
# return 'foo'

if isinstance(val, str):
txt = html.escape(val, True)
txt = ''.join(filter(lambda x: x in printable, txt))
return f"'{txt}'"
except:
return 'null'


def object_to_sql_insert(table_name, obj, columns):
values = []
for c in columns:
values.append(attribute_value_or_null(obj, c))
return f'INSERT INTO {table_name} ({",".join(columns)}) VALUES ({",".join(values)});'


with open('few-cve.jsonl', 'r') as f:
for line in f:
cve = json.loads(line)
print(object_to_sql_insert('cve_item', cve,
['id', 'sourceIdentifier', 'vulnStatus', 'published', 'lastModified', 'evaluatorComment',
'evaluatorSolution', 'cisaExploitAdd', 'cisaActionDue', 'cisaRequiredAction', 'cisaVulnerabilityName',
'cveTags', 'descriptions', 'references']
)
)
56 changes: 56 additions & 0 deletions schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
CREATE TYPE description AS (lang TEXT, value TEXT);

CREATE TYPE tag AS (sourceIdentifier TEXT, tags TEXT []);

CREATE TYPE reference AS ("url" TEXT, source TEXT);

CREATE TYPE cvssMetricV40 AS (
source TEXT,
"type" TEXT,
vector text --fixme just for testing
);

CREATE TYPE cvssMetricV31 AS (
source TEXT,
"type" TEXT,
vector text --fixme just for testing
);

CREATE TYPE cvssMetricV30 AS (
source TEXT,
"type" TEXT,
vector text --fixme just for testing
);

CREATE TYPE cvssMetricV2 AS (
source TEXT,
"type" TEXT,
vector text --fixme just for testing
);

CREATE TYPE metrics AS (
v40 cvssMetricV40 [],
v31 cvssMetricV31 [],
v30 cvssMetricV30 [],
v2 cvssMetricV2 []
);

CREATE TABLE cve_item (
id text NOT NULL,
sourceIdentifier TEXT,
vulnStatus TEXT,
published TEXT,
lastModified TEXT,
evaluatorComment TEXT,
evaluatorSolution TEXT,
cisaExploitAdd TEXT,
cisaActionDue TEXT,
cisaRequiredAction TEXT,
cisaVulnerabilityName TEXT,
-- this seems to be empty always.. have not found a counter sample yet
cveTags tag [],
descriptions description [],
"references" reference [],
metrics metrics,
CONSTRAINT cve_item_pk PRIMARY KEY (id)
);
18 changes: 18 additions & 0 deletions sql-dump-to-jsonl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import json

"""
Script to convert a database dump into a jsonl file tht contains the results from the NIST api.
"""

with open('glvd.sql', 'r') as f:
for line in f:
if 'sourceIdentifier' in line:
elements = line.split('\t')
cve_json = elements[2].replace('\\"', '\"')
try:
cve = json.loads(cve_json)
except:
print(cve_json)
continue
with open('all-cve.jsonl', 'a+') as ff:
ff.write(cve_json)