Skip to content

Commit

Permalink
feat: automatic selection of parser by file content (#2440)
Browse files Browse the repository at this point in the history
* feat: automatic selection of parser by file content

* chore: unittests

* chore: pylint

* chore: documentation
  • Loading branch information
StefanFl authored Jan 13, 2025
1 parent 7fce8b2 commit d5ecc42
Show file tree
Hide file tree
Showing 62 changed files with 585 additions and 1,421 deletions.
2 changes: 0 additions & 2 deletions backend/application/import_observations/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ class FileUploadObservationsByIdRequestSerializer(Serializer):
file = FileField(max_length=255)
product = IntegerField(validators=[MinValueValidator(0)])
branch = IntegerField(validators=[MinValueValidator(0)], required=False)
parser = IntegerField(validators=[MinValueValidator(0)])
service = CharField(max_length=255, required=False)
docker_image_name_tag = CharField(max_length=513, required=False)
endpoint_url = CharField(max_length=2048, required=False)
Expand All @@ -39,7 +38,6 @@ class FileUploadObservationsByNameRequestSerializer(Serializer):
file = FileField(max_length=255)
product_name = CharField(max_length=255)
branch_name = CharField(max_length=255, required=False)
parser_name = CharField(max_length=255)
service = CharField(max_length=255, required=False)
docker_image_name_tag = CharField(max_length=513, required=False)
endpoint_url = CharField(max_length=2048, required=False)
Expand Down
16 changes: 0 additions & 16 deletions backend/application/import_observations/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@
get_api_configuration_by_name,
get_api_configurations,
)
from application.import_observations.queries.parser import (
get_parser_by_id,
get_parser_by_name,
)
from application.import_observations.queries.vulnerability_check import (
get_vulnerability_checks,
)
Expand Down Expand Up @@ -220,11 +216,6 @@ def post(self, request): # pylint: disable=too-many-locals
f"Branch {branch_id} does not exist for product {product}"
)

parser_id = request_serializer.validated_data.get("parser")
parser = get_parser_by_id(parser_id)
if not parser:
raise ValidationError(f"Parser {parser_id} does not exist")

file = request_serializer.validated_data.get("file")
service = request_serializer.validated_data.get("service")
docker_image_name_tag = request_serializer.validated_data.get(
Expand All @@ -239,7 +230,6 @@ def post(self, request): # pylint: disable=too-many-locals
file_upload_parameters = FileUploadParameters(
product=product,
branch=branch,
parser=parser,
file=file,
service=service,
docker_image_name_tag=docker_image_name_tag,
Expand Down Expand Up @@ -308,11 +298,6 @@ def post(self, request): # pylint: disable=too-many-locals
if not branch:
branch = Branch.objects.create(product=product, name=branch_name)

parser_name = request_serializer.validated_data.get("parser_name")
parser = get_parser_by_name(parser_name)
if not parser:
raise ValidationError(f"Parser {parser_name} does not exist")

file = request_serializer.validated_data.get("file")
service = request_serializer.validated_data.get("service")
docker_image_name_tag = request_serializer.validated_data.get(
Expand All @@ -327,7 +312,6 @@ def post(self, request): # pylint: disable=too-many-locals
file_upload_parameters = FileUploadParameters(
product=product,
branch=branch,
parser=parser,
file=file,
service=service,
docker_image_name_tag=docker_image_name_tag,
Expand Down
3 changes: 3 additions & 0 deletions backend/application/import_observations/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class ParserError(Exception):
def __init__(self, message):
self.message = message
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def register_module(self, module_name: str) -> None:
):
_register_parser(module_name)
except Exception as exc:
print(exc)
raise CommandError(
format_log_message(message=f"Failed to load {module_name}")
) from exc
Expand Down
Original file line number Diff line number Diff line change
@@ -1,47 +1,31 @@
import csv
import io
import re
from json import dumps

from django.core.files.base import File

from application.core.models import Observation
from application.import_observations.parsers.base_parser import (
BaseFileParser,
BaseParser,
)
from application.import_observations.types import Parser_Type
from application.import_observations.types import Parser_Filetype, Parser_Type


class AzureDefenderParser(BaseParser, BaseFileParser):
@classmethod
def get_name(cls) -> str:
return "Azure Defender"

@classmethod
def get_filetype(cls) -> str:
return Parser_Filetype.FILETYPE_CSV

@classmethod
def get_type(cls) -> str:
return Parser_Type.TYPE_INFRASTRUCTURE

def check_format(self, file: File) -> tuple[bool, list[str], dict | list]:
if file.name and not file.name.endswith(".csv"):
return False, ["File is not CSV"], {}
try:
content = file.read()
if isinstance(content, bytes):
content = content.decode("utf-8")
reader = csv.DictReader(io.StringIO(content), delimiter=",", quotechar='"')
except Exception:
return False, ["File is not valid CSV"], {}

rows = []
for row in reader:
rows.append(row)

if rows:
if not rows[0].get("subscriptionName"):
return False, ["File is not an Azure Defender export"], {}

return True, [], rows
def check_format(self, data: list[dict]) -> bool:
if data and data[0].get("subscriptionId") and data[0].get("subscriptionName"):
return True
return False

def get_observations(self, data: list[dict]) -> list[Observation]:
observations = []
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from typing import Any, Optional

from django.core.files.base import File

from application.core.models import Observation
from application.import_observations.models import Api_Configuration
from application.licenses.models import License_Component
Expand Down Expand Up @@ -43,5 +41,9 @@ def check_connection(


class BaseFileParser:
def check_format(self, file: File) -> tuple[bool, list[str], Any]:
@classmethod
def get_filetype(cls) -> str:
raise NotImplementedError("check_format() must be overridden")

def check_format(self, data: Any) -> bool:
raise NotImplementedError("check_format() must be overridden")
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from json import dumps, load
from typing import Optional

from django.core.files.base import File
from json import dumps
from typing import Any, Optional

from application.core.models import Observation
from application.core.types import Severity
from application.import_observations.parsers.base_parser import (
BaseFileParser,
BaseParser,
)
from application.import_observations.types import Parser_Type
from application.import_observations.types import Parser_Filetype, Parser_Type

# Recommended cipher suites, curves and signature algorithms according to German BSI as of 2023
TLS12_RECOMMENDED_CIPHERS = [
Expand Down Expand Up @@ -120,25 +118,24 @@ class CryptoLyzerParser(BaseParser, BaseFileParser):
def get_name(cls) -> str:
return "CryptoLyzer"

@classmethod
def get_filetype(cls) -> str:
return Parser_Filetype.FILETYPE_JSON

@classmethod
def get_type(cls) -> str:
return Parser_Type.TYPE_DAST # pylint: disable=duplicate-code

def check_format(self, file: File) -> tuple[bool, list[str], dict]:
try:
data = load(file)
except Exception:
return False, ["File is not valid JSON"], {}

def check_format(self, data: Any) -> bool:
if (
not data.get("target")
or not data.get("versions")
or not data.get("ciphers")
or not data.get("curves")
isinstance(data, dict)
and data.get("target")
and data.get("versions")
and data.get("ciphers")
and data.get("curves")
):
return False, ["File is not a valid CryptoLyzer format"], {}

return True, [], data
return True
return False

def get_observations(self, data: dict) -> list[Observation]:
observations = []
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from json import dumps, load
from json import dumps
from typing import Any, Optional

from django.core.files.base import File

from application.core.models import Observation
from application.core.types import Severity
from application.import_observations.parsers.base_parser import (
Expand All @@ -13,7 +11,7 @@
get_component_dependencies,
)
from application.import_observations.parsers.cyclone_dx.types import Component, Metadata
from application.import_observations.types import Parser_Type
from application.import_observations.types import Parser_Filetype, Parser_Type
from application.licenses.models import License_Component


Expand All @@ -26,21 +24,18 @@ def __init__(self):
def get_name(cls) -> str:
return "CycloneDX"

@classmethod
def get_filetype(cls) -> str:
return Parser_Filetype.FILETYPE_JSON

@classmethod
def get_type(cls) -> str:
return Parser_Type.TYPE_SCA

def check_format(self, file: File) -> tuple[bool, list[str], dict]:
try:
data = load(file)
except Exception:
return False, ["File is not valid JSON"], {}

bom_format = data.get("bomFormat")
if bom_format != "CycloneDX":
return False, ["File is not a CycloneDX SBOM"], {}

return True, [], data
def check_format(self, data: Any) -> bool:
if isinstance(data, dict) and data.get("bomFormat") == "CycloneDX":
return True
return False

def get_observations(self, data: dict) -> list[Observation]:
self.components = self._get_components(data)
Expand Down
49 changes: 18 additions & 31 deletions backend/application/import_observations/parsers/drheader/parser.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from json import dumps, load

from django.core.files.base import File
from json import dumps
from typing import Any

from application.core.models import Observation
from application.core.types import Severity
from application.import_observations.parsers.base_parser import (
BaseFileParser,
BaseParser,
)
from application.import_observations.types import Parser_Type
from application.import_observations.types import Parser_Filetype, Parser_Type

REFERENCES = {
"Access-Control-Allow-Origin": [
Expand Down Expand Up @@ -93,37 +92,25 @@ class DrHEADerParser(BaseParser, BaseFileParser):
def get_name(cls) -> str:
return "DrHeader"

@classmethod
def get_filetype(cls) -> str:
return Parser_Filetype.FILETYPE_JSON

@classmethod
def get_type(cls) -> str:
return Parser_Type.TYPE_DAST

def check_format(self, file: File) -> tuple[bool, list[str], dict | list]:
try: # pylint: disable=duplicate-code
data = load(file)
except Exception:
return False, ["File is not valid JSON"], {}

if not isinstance(data, list):
return False, ["File is not a DrHeader format, data is not a list"], {}

if len(data) >= 1: # pylint: disable=duplicate-code
first_element = data[0]
if not isinstance(first_element, dict):
return (
False,
["File is not a DrHeader format, element is not a dictionary"],
{},
)
if not first_element.get("rule"):
return (
False,
[
"Data is not a DrHeader format, element doesn't have a rule entry"
],
{},
)

return True, [], data
def check_format(self, data: Any) -> bool:
if (
isinstance(data, list) # pylint: disable=too-many-boolean-expressions
and len(data) >= 1
and isinstance(data[0], dict)
and data[0].get("rule")
and data[0].get("message")
and data[0].get("severity")
):
return True
return False

def get_observations(self, data: list) -> list[Observation]:
observations = []
Expand Down
Loading

0 comments on commit d5ecc42

Please sign in to comment.