From f3b91129b82d51c221042d29117e10e3f6ed3b91 Mon Sep 17 00:00:00 2001 From: Stefan Fleckenstein Date: Mon, 16 Dec 2024 08:40:58 +0100 Subject: [PATCH 1/3] feat: parser for semgrep json format --- .../parsers/semgrep/__init__.py | 0 .../parsers/semgrep/parser.py | 142 ++++++++++ .../parsers/semgrep/__init__.py | 0 .../semgrep/files/multiple_observations.json | 245 ++++++++++++++++++ .../parsers/semgrep/files/no_observation.json | 4 + .../parsers/semgrep/files/wrong_format.json | 3 + .../parsers/semgrep/test_parser.py | 86 ++++++ .../github_actions_and_templates.md | 40 +-- docs/integrations/supported_scanners.md | 2 +- 9 files changed, 501 insertions(+), 21 deletions(-) create mode 100644 backend/application/import_observations/parsers/semgrep/__init__.py create mode 100644 backend/application/import_observations/parsers/semgrep/parser.py create mode 100644 backend/unittests/import_observations/parsers/semgrep/__init__.py create mode 100644 backend/unittests/import_observations/parsers/semgrep/files/multiple_observations.json create mode 100644 backend/unittests/import_observations/parsers/semgrep/files/no_observation.json create mode 100644 backend/unittests/import_observations/parsers/semgrep/files/wrong_format.json create mode 100644 backend/unittests/import_observations/parsers/semgrep/test_parser.py diff --git a/backend/application/import_observations/parsers/semgrep/__init__.py b/backend/application/import_observations/parsers/semgrep/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/application/import_observations/parsers/semgrep/parser.py b/backend/application/import_observations/parsers/semgrep/parser.py new file mode 100644 index 000000000..38f2a007c --- /dev/null +++ b/backend/application/import_observations/parsers/semgrep/parser.py @@ -0,0 +1,142 @@ +from json import dumps, load + +from django.core.files.base import File + +from application.core.models import Observation +from application.core.types import Severity +from application.import_observations.parsers.base_parser import ( + BaseFileParser, + BaseParser, +) +from application.import_observations.types import Parser_Type + +SEVERITIES = { + "error": Severity.SEVERITY_HIGH, + "warning": Severity.SEVERITY_MEDIUM, + "info": Severity.SEVERITY_LOW, +} + + +class SemgrepParser(BaseParser, BaseFileParser): + @classmethod + def get_name(cls) -> str: + return "Semgrep" + + @classmethod + def get_type(cls) -> str: + return Parser_Type.TYPE_SAST + + def check_format(self, file: File) -> tuple[bool, list[str], dict | list]: + try: # pylint: disable=duplicate-code + data = load(file) + except Exception: + return False, ["File is not valid JSON"], {} + + if not data.get("version"): + return False, ["File is not a Semgrep format, version is missing"], {} + + if not isinstance(data.get("results"), list): + return False, ["File is not a Semgrep format, data is not a list"], {} + + if len(data.get("results")) >= 1: # pylint: disable=duplicate-code + first_element = data.get("results")[0] + if not isinstance(first_element, dict): + return ( + False, + ["File is not a Semgrep format, element is not a dictionary"], + {}, + ) + if not first_element.get("check_id"): + return ( + False, + [ + "Data is not a Semgrep format, element doesn't have a check_id entry" + ], + {}, + ) + + return True, [], data + + def get_observations(self, data: dict) -> list[Observation]: + observations = [] + + version = data.get("version") + + for result in data.get("results"): + extra = result.get("extra", {}) + metadata = extra.get("metadata", {}) + category = metadata.get("category") + if category.lower() != "security": + continue + + check_id = result.get("check_id") + path = result.get("path") + start_line = None + end_line = None + if path: + start_line = result.get("start", {}).get("line") + end_line = result.get("end", {}).get("line") + severity = extra.get("severity") + engine_kind = extra.get("engine_kind") + fix = extra.get("fix") + if fix: + fix = f"```\n{fix}\n```" + + so_severity = SEVERITIES.get(severity.lower(), Severity.SEVERITY_UNKNOWN) + + scanner = self.get_name() + if engine_kind: + scanner = f"{scanner} ({engine_kind})" + scanner = f"{scanner} / {version}" + + observation = Observation( + title=check_id, + description=self._get_description(result), + recommendation=fix, + parser_severity=so_severity, + origin_source_file=path, + origin_source_line_start=start_line, + origin_source_line_end=end_line, + scanner=scanner, + ) + + evidence = [] + evidence.append("Result") + evidence.append(dumps(result)) + observation.unsaved_evidences.append(evidence) + + observation.unsaved_references = self._get_references(result) + + observations.append(observation) + + return observations + + def _get_description(self, result: dict) -> str: + extra = result.get("extra", {}) + message = extra.get("message") + metadata = extra.get("metadata", {}) + vulnerability_class = metadata.get("vulnerability_class", []) + + description = f"{message}" + + if len(vulnerability_class) == 1: + description += f"\n\n**Vulnerability Class:** {vulnerability_class[0]}" + if len(vulnerability_class) > 1: + description += f"\n\n**Vulnerability Classes:** {', '.join(vulnerability_class)}" + + return description + + def _get_references(self, result: dict) -> list[str]: + so_references = [] + + extra = result.get("extra", {}) + metadata = extra.get("metadata", {}) + references = metadata.get("references") + source = metadata.get("source") + references = metadata.get("references", []) + + if source: + so_references.append(source) + so_references.extend(references) + + return so_references diff --git a/backend/unittests/import_observations/parsers/semgrep/__init__.py b/backend/unittests/import_observations/parsers/semgrep/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/unittests/import_observations/parsers/semgrep/files/multiple_observations.json b/backend/unittests/import_observations/parsers/semgrep/files/multiple_observations.json new file mode 100644 index 000000000..65dc8d146 --- /dev/null +++ b/backend/unittests/import_observations/parsers/semgrep/files/multiple_observations.json @@ -0,0 +1,245 @@ +{ + "version": "1.100.0", + "results": [ + { + "check_id": "python.lang.correctness.return-in-init.return-in-init", + "path": "application/api/utils.py", + "start": { + "line": 10, + "col": 44, + "offset": 398 + }, + "end": { + "line": 10, + "col": 95, + "offset": 449 + }, + "extra": { + "message": "`return` should never appear inside a class __init__ function. This will cause a runtime error.", + "metadata": { + "category": "correctness", + "technology": [ + "python" + ], + "license": "Semgrep Rules License v1.0. For more details, visit semgrep.dev/legal/rules-license", + "source": "https://semgrep.dev/r/python.lang.correctness.return-in-init.return-in-init", + "shortlink": "https://sg.run/4xwl", + "semgrep.dev": { + "rule": { + "origin": "community", + "r_id": 9600, + "rule_id": "AbUzYe", + "rv_id": 946298, + "url": "https://semgrep.dev/playground/r/O9TX3WW/python.lang.correctness.return-in-init.return-in-init", + "version_id": "O9TX3WW" + } + } + }, + "severity": "ERROR", + "fingerprint": "requires login", + "lines": "requires login", + "validation_state": "NO_VALIDATOR", + "engine_kind": "OSS" + } + }, + { + "check_id": "python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", + "path": "application/management/commands/command.py", + "start": { + "line": 62, + "col": 17, + "offset": 3082 + }, + "end": { + "line": 62, + "col": 62, + "offset": 3127 + }, + "extra": { + "message": "Avoiding SQL string concatenation: untrusted input concatenated with raw SQL query can result in SQL Injection. In order to execute raw query safely, prepared statement should be used. SQLAlchemy provides TextualSQL to easily used prepared statement with named parameters. For complex SQL composition, use SQL Expression Language or Schema Definition Language. In most cases, SQLAlchemy ORM will be a better option.", + "metadata": { + "cwe": [ + "CWE-89: Improper Neutralization of Special Elements used in an SQL Command ('SQL Injection')" + ], + "owasp": [ + "A01:2017 - Injection", + "A03:2021 - Injection" + ], + "references": [ + "https://docs.sqlalchemy.org/en/14/core/tutorial.html#using-textual-sql", + "https://www.tutorialspoint.com/sqlalchemy/sqlalchemy_quick_guide.htm", + "https://docs.sqlalchemy.org/en/14/core/tutorial.html#using-more-specific-text-with-table-expression-literal-column-and-expression-column" + ], + "category": "security", + "technology": [ + "sqlalchemy" + ], + "cwe2022-top25": true, + "cwe2021-top25": true, + "subcategory": [ + "audit" + ], + "likelihood": "LOW", + "impact": "HIGH", + "confidence": "LOW", + "license": "Semgrep Rules License v1.0. For more details, visit semgrep.dev/legal/rules-license", + "vulnerability_class": [ + "SQL Injection" + ], + "source": "https://semgrep.dev/r/python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", + "shortlink": "https://sg.run/2b1L", + "semgrep.dev": { + "rule": { + "origin": "community", + "r_id": 10563, + "rule_id": "oqUz5y", + "rv_id": 946452, + "url": "https://semgrep.dev/playground/r/8KTKj19/python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", + "version_id": "8KTKj19" + } + } + }, + "severity": "ERROR", + "fingerprint": "requires login", + "lines": "requires login", + "validation_state": "NO_VALIDATOR", + "engine_kind": "OSS" + } + }, + { + "check_id": "python.lang.security.insecure-hash-algorithms-md5.insecure-hash-algorithm-md5", + "path": "application/tools/functions.py", + "start": { + "line": 45, + "col": 24, + "offset": 1496 + }, + "end": { + "line": 46, + "col": 34, + "offset": 1580 + }, + "extra": { + "message": "Detected MD5 hash algorithm which is considered insecure. MD5 is not collision resistant and is therefore not suitable as a cryptographic signature. Use SHA256 or SHA3 instead.", + "metadata": { + "source-rule-url": "https://github.com/PyCQA/bandit/blob/d5f8fa0d89d7b11442fc6ec80ca42953974354c8/bandit/blacklists/calls.py#L59", + "cwe": [ + "CWE-327: Use of a Broken or Risky Cryptographic Algorithm" + ], + "owasp": [ + "A03:2017 - Sensitive Data Exposure", + "A02:2021 - Cryptographic Failures" + ], + "bandit-code": "B303", + "asvs": { + "control_id": "6.2.2 Insecure Custom Algorithm", + "control_url": "https://github.com/OWASP/ASVS/blob/master/4.0/en/0x14-V6-Cryptography.md#v62-algorithms", + "section": "V6 Stored Cryptography Verification Requirements", + "version": "4" + }, + "references": [ + "https://www.schneier.com/blog/archives/2012/10/when_will_we_se.html", + "https://www.trendmicro.com/vinfo/us/security/news/vulnerabilities-and-exploits/sha-1-collision-signals-the-end-of-the-algorithm-s-viability", + "http://2012.sharcs.org/slides/stevens.pdf", + "https://pycryptodome.readthedocs.io/en/latest/src/hash/sha3_256.html" + ], + "category": "security", + "technology": [ + "python" + ], + "subcategory": [ + "vuln" + ], + "likelihood": "LOW", + "impact": "MEDIUM", + "confidence": "MEDIUM", + "license": "Semgrep Rules License v1.0. For more details, visit semgrep.dev/legal/rules-license", + "vulnerability_class": [ + "Cryptographic Issues", + "Other Issues" + ], + "source": "https://semgrep.dev/r/python.lang.security.insecure-hash-algorithms-md5.insecure-hash-algorithm-md5", + "shortlink": "https://sg.run/vYrY", + "semgrep.dev": { + "rule": { + "origin": "community", + "r_id": 33633, + "rule_id": "PeU2e2", + "rv_id": 946401, + "url": "https://semgrep.dev/playground/r/vdTGnR5/python.lang.security.insecure-hash-algorithms-md5.insecure-hash-algorithm-md5", + "version_id": "vdTGnR5" + } + } + }, + "severity": "WARNING", + "fingerprint": "requires login", + "lines": "requires login", + "validation_state": "NO_VALIDATOR", + "engine_kind": "OSS" + } + }, + { + "check_id": "python.lang.security.use-defusedcsv.use-defusedcsv", + "path": "application/management/commands/command.py", + "start": { + "line": 33, + "col": 18, + "offset": 820 + }, + "end": { + "line": 33, + "col": 50, + "offset": 852 + }, + "extra": { + "message": "Detected the generation of a CSV file using the built-in `csv` module. If user data is used to generate the data in this file, it is possible that an attacker could inject a formula when the CSV is imported into a spreadsheet application that runs an attacker script, which could steal data from the importing user or, at worst, install malware on the user's computer. `defusedcsv` is a drop-in replacement with the same API that will attempt to mitigate formula injection attempts. You can use `defusedcsv` instead of `csv` to safely generate CSVs.", + "fix": "defusedcsv.writer(open(file_path, 'w'))", + "metadata": { + "cwe": [ + "CWE-1236: Improper Neutralization of Formula Elements in a CSV File" + ], + "owasp": [ + "A01:2017 - Injection", + "A03:2021 - Injection" + ], + "references": [ + "https://github.com/raphaelm/defusedcsv", + "https://owasp.org/www-community/attacks/CSV_Injection", + "https://web.archive.org/web/20220516052229/https://www.contextis.com/us/blog/comma-separated-vulnerabilities" + ], + "category": "security", + "technology": [ + "python" + ], + "confidence": "LOW", + "subcategory": [ + "audit" + ], + "likelihood": "LOW", + "impact": "LOW", + "license": "Semgrep Rules License v1.0. For more details, visit semgrep.dev/legal/rules-license", + "vulnerability_class": [ + "Improper Validation" + ], + "source": "https://semgrep.dev/r/python.lang.security.use-defusedcsv.use-defusedcsv", + "shortlink": "https://sg.run/GzRn", + "semgrep.dev": { + "rule": { + "origin": "community", + "r_id": 31148, + "rule_id": "gxUrAb", + "rv_id": 946409, + "url": "https://semgrep.dev/playground/r/gETe1DO/python.lang.security.use-defusedcsv.use-defusedcsv", + "version_id": "gETe1DO" + } + } + }, + "severity": "INFO", + "fingerprint": "requires login", + "lines": "requires login", + "validation_state": "NO_VALIDATOR", + "engine_kind": "OSS" + } + } + ] +} diff --git a/backend/unittests/import_observations/parsers/semgrep/files/no_observation.json b/backend/unittests/import_observations/parsers/semgrep/files/no_observation.json new file mode 100644 index 000000000..b4ed2e6a0 --- /dev/null +++ b/backend/unittests/import_observations/parsers/semgrep/files/no_observation.json @@ -0,0 +1,4 @@ +{ + "version": "1.100.0", + "results": [] +} diff --git a/backend/unittests/import_observations/parsers/semgrep/files/wrong_format.json b/backend/unittests/import_observations/parsers/semgrep/files/wrong_format.json new file mode 100644 index 000000000..f43c3e770 --- /dev/null +++ b/backend/unittests/import_observations/parsers/semgrep/files/wrong_format.json @@ -0,0 +1,3 @@ +{ + "data": "data" +} diff --git a/backend/unittests/import_observations/parsers/semgrep/test_parser.py b/backend/unittests/import_observations/parsers/semgrep/test_parser.py new file mode 100644 index 000000000..a6b26659d --- /dev/null +++ b/backend/unittests/import_observations/parsers/semgrep/test_parser.py @@ -0,0 +1,86 @@ +from os import path +from unittest import TestCase + +from application.import_observations.parsers.semgrep.parser import SemgrepParser + + +class TestSemgrepParser(TestCase): + def test_no_json(self): + with open(path.dirname(__file__) + "/test_parser.py") as testfile: + parser = SemgrepParser() + check, messages, data = parser.check_format(testfile) + + self.assertFalse(check) + self.assertEqual(1, len(messages)) + self.assertEqual("File is not valid JSON", messages[0]) + self.assertFalse(data) + + def test_wrong_format(self): + with open(path.dirname(__file__) + "/files/wrong_format.json") as testfile: + parser = SemgrepParser() + check, messages, data = parser.check_format(testfile) + + self.assertFalse(check) + self.assertEqual(1, len(messages)) + self.assertEqual("File is not a Semgrep format, version is missing", messages[0]) + self.assertFalse(data) + + def test_no_observation(self): + with open(path.dirname(__file__) + "/files/no_observation.json") as testfile: + parser = SemgrepParser() + check, messages, data = parser.check_format(testfile) + observations = parser.get_observations(data) + + self.assertTrue(check) + self.assertEqual(0, len(messages)) + self.assertEqual(0, len(observations)) + + def test_multiple_observations(self): + with open( + path.dirname(__file__) + "/files/multiple_observations.json" + ) as testfile: + parser = SemgrepParser() + check, messages, data = parser.check_format(testfile) + observations = parser.get_observations(data) + + self.assertTrue(check) + self.assertEqual(0, len(messages)) + self.assertEqual(3, len(observations)) + + observation = observations[0] + self.assertEqual("python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", observation.title) + description = """Avoiding SQL string concatenation: untrusted input concatenated with raw SQL query can result in SQL Injection. In order to execute raw query safely, prepared statement should be used. SQLAlchemy provides TextualSQL to easily used prepared statement with named parameters. For complex SQL composition, use SQL Expression Language or Schema Definition Language. In most cases, SQLAlchemy ORM will be a better option. + +**Vulnerability Class:** SQL Injection""" + self.assertEqual(description, observation.description) + self.assertEqual(None, observation.recommendation) + self.assertEqual("High", observation.parser_severity) + self.assertEqual( + "application/management/commands/command.py", observation.origin_source_file + ) + self.assertEqual(62, observation.origin_source_line_start) + self.assertEqual(62, observation.origin_source_line_end) + self.assertEqual("Semgrep (OSS) / 1.100.0", observation.scanner) + self.assertEqual(4, len(observation.unsaved_references)) + self.assertEqual("https://semgrep.dev/r/python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", observation.unsaved_references[0]) + self.assertEqual("https://docs.sqlalchemy.org/en/14/core/tutorial.html#using-textual-sql", observation.unsaved_references[1]) + self.assertEqual(1, len(observation.unsaved_evidences)) + self.assertEqual("Result", observation.unsaved_evidences[0][0]) + self.assertIn("oqUz5y", observation.unsaved_evidences[0][1]) + + observation = observations[1] + self.assertEqual("python.lang.security.insecure-hash-algorithms-md5.insecure-hash-algorithm-md5", observation.title) + description = """Detected MD5 hash algorithm which is considered insecure. MD5 is not collision resistant and is therefore not suitable as a cryptographic signature. Use SHA256 or SHA3 instead. + +**Vulnerability Classes:** Cryptographic Issues, Other Issues""" + self.assertEqual(description, observation.description) + self.assertEqual(None, observation.recommendation) + self.assertEqual("Medium", observation.parser_severity) + + observation = observations[2] + self.assertEqual("python.lang.security.use-defusedcsv.use-defusedcsv", observation.title) + recommendation = """``` +defusedcsv.writer(open(file_path, 'w')) +```""" + self.assertEqual(recommendation, observation.recommendation) + self.assertEqual("Low", observation.parser_severity) diff --git a/docs/integrations/github_actions_and_templates.md b/docs/integrations/github_actions_and_templates.md index 38d65a7f2..fa5e5ebda 100644 --- a/docs/integrations/github_actions_and_templates.md +++ b/docs/integrations/github_actions_and_templates.md @@ -89,8 +89,8 @@ jobs: - name: Run Bandit uses: MaibornWolff/secobserve_actions_templates/actions/SAST/bandit@main with: - target: 'dd_import' - report_name: 'dd_import_bandit.sarif' + target: 'backend' + report_name: 'backend_bandit.sarif' so_api_base_url: ${{ vars.SO_API_BASE_URL }} so_api_token: ${{ secrets.SO_API_TOKEN }} so_product_name: ${{ vars.SO_PRODUCT_NAME }} @@ -98,8 +98,8 @@ jobs: - name: Run Semgrep uses: MaibornWolff/secobserve_actions_templates/actions/SAST/semgrep@main with: - target: 'dd_import' - report_name: 'dd_import_semgrep.json' + target: 'backend' + report_name: 'backend_semgrep.json' configuration: 'r/python' so_api_base_url: ${{ vars.SO_API_BASE_URL }} so_api_token: ${{ secrets.SO_API_TOKEN }} @@ -109,7 +109,7 @@ jobs: uses: MaibornWolff/secobserve_actions_templates/actions/SAST/kics@main with: target: '.' - report_name: 'dd_import_kics.sarif' + report_name: 'backend_kics.sarif' so_api_base_url: ${{ vars.SO_API_BASE_URL }} so_api_token: ${{ secrets.SO_API_TOKEN }} so_product_name: ${{ vars.SO_PRODUCT_NAME }} @@ -118,7 +118,7 @@ jobs: uses: MaibornWolff/secobserve_actions_templates/actions/SAST/checkov@main with: target: '.' - report_name: 'dd_import_checkov.sarif' + report_name: 'backend_checkov.sarif' so_api_base_url: ${{ vars.SO_API_BASE_URL }} so_api_token: ${{ secrets.SO_API_TOKEN }} so_product_name: ${{ vars.SO_PRODUCT_NAME }} @@ -126,8 +126,8 @@ jobs: - name: Run Trivy image uses: MaibornWolff/secobserve_actions_templates/actions/SCA/trivy_image@main with: - target: 'maibornwolff/dd-import:latest' - report_name: 'dd_import_trivy_image.json' + target: 'maibornwolff/secobserve-backend:latest' + report_name: 'backend_trivy_image.json' so_api_base_url: ${{ vars.SO_API_BASE_URL }} so_api_token: ${{ secrets.SO_API_TOKEN }} so_product_name: ${{ vars.SO_PRODUCT_NAME }} @@ -135,8 +135,8 @@ jobs: - name: Run Grype image uses: MaibornWolff/secobserve_actions_templates/actions/SCA/grype_image@main with: - target: 'maibornwolff/dd-import:latest' - report_name: 'dd_import_grype_image.json' + target: 'maibornwolff/secobserve-backend:latest' + report_name: 'backend_grype_image.json' so_api_base_url: ${{ vars.SO_API_BASE_URL }} so_api_token: ${{ secrets.SO_API_TOKEN }} so_product_name: ${{ vars.SO_PRODUCT_NAME }} @@ -144,7 +144,7 @@ jobs: - name: Run Gitleaks uses: MaibornWolff/secobserve_actions_templates/actions/secrets/gitleaks@main with: - report_name: 'dd_import_gitleaks.sarif' + report_name: 'backend_gitleaks.sarif' so_api_base_url: ${{ vars.SO_API_BASE_URL }} so_api_token: ${{ secrets.SO_API_TOKEN }} so_product_name: ${{ vars.SO_PRODUCT_NAME }} @@ -154,13 +154,13 @@ jobs: with: name: secobserve path: | - dd_import_bandit.sarif - dd_import_semgrep.json - dd_import_kics.sarif - dd_import_checkov.sarif - dd_import_trivy_image.json - dd_import_grype_image.json - dd_import_gitleaks.sarif + backend_bandit.sarif + backend_semgrep.json + backend_kics.sarif + backend_checkov.sarif + backend_trivy_image.json + backend_grype_image.json + backend_gitleaks.sarif ``` ## Examplary pipeline for GitLab CI templates @@ -377,7 +377,7 @@ semgrep_backend: RUN_DIRECTORY: "." CONFIGURATION: "r/python" TARGET: "backend" - REPORT_NAME: "semgrep_backend.sarif" + REPORT_NAME: "semgrep_backend.json" SO_ORIGIN_SERVICE: "backend" SO_BRANCH_NAME: $GITHUB_REF_NAME @@ -386,7 +386,7 @@ semgrep_frontend: RUN_DIRECTORY: "." CONFIGURATION: "r/typescript" TARGET: "frontend/src" - REPORT_NAME: "semgrep_frontend.sarif" + REPORT_NAME: "semgrep_frontend.json" SO_ORIGIN_SERVICE: "frontend" SO_BRANCH_NAME: $GITHUB_REF_NAME diff --git a/docs/integrations/supported_scanners.md b/docs/integrations/supported_scanners.md index 8065bf72d..7a33e359e 100644 --- a/docs/integrations/supported_scanners.md +++ b/docs/integrations/supported_scanners.md @@ -39,7 +39,7 @@ These scanners have been tested with SecObserve: | [Bandit](https://bandit.readthedocs.io/en/latest) | SARIF | File | | [ESLint](https://github.com/nodesecurity/eslint-plugin-security) | SARIF | File | | [Find-Sec-Bugs](https://find-sec-bugs.github.io) | SARIF | File | -| [Semgrep](https://semgrep.dev/docs) | SARIF | File | +| [Semgrep](https://semgrep.dev/docs) | Semgrep | File | | **SAST infrastructure** | | [Checkov](https://www.checkov.io/1.Welcome/Quick%20Start.html) | SARIF | File | | [KICS](https://docs.kics.io/latest) | SARIF | File | From e2ce44bc5c4bf18bd55d5d0b466fe808cc181e1b Mon Sep 17 00:00:00 2001 From: Stefan Fleckenstein Date: Mon, 16 Dec 2024 08:44:10 +0100 Subject: [PATCH 2/3] chore: code quality --- .../parsers/semgrep/parser.py | 6 ++-- .../parsers/semgrep/test_parser.py | 31 ++++++++++++++----- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/backend/application/import_observations/parsers/semgrep/parser.py b/backend/application/import_observations/parsers/semgrep/parser.py index 38f2a007c..9b338c933 100644 --- a/backend/application/import_observations/parsers/semgrep/parser.py +++ b/backend/application/import_observations/parsers/semgrep/parser.py @@ -62,7 +62,7 @@ def get_observations(self, data: dict) -> list[Observation]: version = data.get("version") - for result in data.get("results"): + for result in data.get("results", {}): extra = result.get("extra", {}) metadata = extra.get("metadata", {}) category = metadata.get("category") @@ -122,7 +122,9 @@ def _get_description(self, result: dict) -> str: if len(vulnerability_class) == 1: description += f"\n\n**Vulnerability Class:** {vulnerability_class[0]}" if len(vulnerability_class) > 1: - description += f"\n\n**Vulnerability Classes:** {', '.join(vulnerability_class)}" + description += ( + f"\n\n**Vulnerability Classes:** {', '.join(vulnerability_class)}" + ) return description diff --git a/backend/unittests/import_observations/parsers/semgrep/test_parser.py b/backend/unittests/import_observations/parsers/semgrep/test_parser.py index a6b26659d..8824e063c 100644 --- a/backend/unittests/import_observations/parsers/semgrep/test_parser.py +++ b/backend/unittests/import_observations/parsers/semgrep/test_parser.py @@ -22,7 +22,9 @@ def test_wrong_format(self): self.assertFalse(check) self.assertEqual(1, len(messages)) - self.assertEqual("File is not a Semgrep format, version is missing", messages[0]) + self.assertEqual( + "File is not a Semgrep format, version is missing", messages[0] + ) self.assertFalse(data) def test_no_observation(self): @@ -48,7 +50,10 @@ def test_multiple_observations(self): self.assertEqual(3, len(observations)) observation = observations[0] - self.assertEqual("python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", observation.title) + self.assertEqual( + "python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", + observation.title, + ) description = """Avoiding SQL string concatenation: untrusted input concatenated with raw SQL query can result in SQL Injection. In order to execute raw query safely, prepared statement should be used. SQLAlchemy provides TextualSQL to easily used prepared statement with named parameters. For complex SQL composition, use SQL Expression Language or Schema Definition Language. In most cases, SQLAlchemy ORM will be a better option. **Vulnerability Class:** SQL Injection""" @@ -56,20 +61,30 @@ def test_multiple_observations(self): self.assertEqual(None, observation.recommendation) self.assertEqual("High", observation.parser_severity) self.assertEqual( - "application/management/commands/command.py", observation.origin_source_file + "application/management/commands/command.py", + observation.origin_source_file, ) self.assertEqual(62, observation.origin_source_line_start) self.assertEqual(62, observation.origin_source_line_end) self.assertEqual("Semgrep (OSS) / 1.100.0", observation.scanner) self.assertEqual(4, len(observation.unsaved_references)) - self.assertEqual("https://semgrep.dev/r/python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", observation.unsaved_references[0]) - self.assertEqual("https://docs.sqlalchemy.org/en/14/core/tutorial.html#using-textual-sql", observation.unsaved_references[1]) + self.assertEqual( + "https://semgrep.dev/r/python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query", + observation.unsaved_references[0], + ) + self.assertEqual( + "https://docs.sqlalchemy.org/en/14/core/tutorial.html#using-textual-sql", + observation.unsaved_references[1], + ) self.assertEqual(1, len(observation.unsaved_evidences)) self.assertEqual("Result", observation.unsaved_evidences[0][0]) self.assertIn("oqUz5y", observation.unsaved_evidences[0][1]) observation = observations[1] - self.assertEqual("python.lang.security.insecure-hash-algorithms-md5.insecure-hash-algorithm-md5", observation.title) + self.assertEqual( + "python.lang.security.insecure-hash-algorithms-md5.insecure-hash-algorithm-md5", + observation.title, + ) description = """Detected MD5 hash algorithm which is considered insecure. MD5 is not collision resistant and is therefore not suitable as a cryptographic signature. Use SHA256 or SHA3 instead. **Vulnerability Classes:** Cryptographic Issues, Other Issues""" @@ -78,7 +93,9 @@ def test_multiple_observations(self): self.assertEqual("Medium", observation.parser_severity) observation = observations[2] - self.assertEqual("python.lang.security.use-defusedcsv.use-defusedcsv", observation.title) + self.assertEqual( + "python.lang.security.use-defusedcsv.use-defusedcsv", observation.title + ) recommendation = """``` defusedcsv.writer(open(file_path, 'w')) ```""" From 92044e07ccf3d0fb228d19c76696b406e22f2598 Mon Sep 17 00:00:00 2001 From: Stefan Fleckenstein Date: Mon, 16 Dec 2024 17:08:47 +0100 Subject: [PATCH 3/3] chore: unittests --- .../import_observations/management/test_register_parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/unittests/import_observations/management/test_register_parsers.py b/backend/unittests/import_observations/management/test_register_parsers.py index a27dab7e9..6707bfd94 100644 --- a/backend/unittests/import_observations/management/test_register_parsers.py +++ b/backend/unittests/import_observations/management/test_register_parsers.py @@ -17,7 +17,7 @@ def test_register_parsers(self): command.handle() parsers = Parser.objects.all().order_by("name") - self.assertEqual(12, len(parsers)) + self.assertEqual(13, len(parsers)) parser = parsers[0] self.assertEqual("Azure Defender", parser.name) @@ -40,7 +40,7 @@ def test_register_parsers(self): self.assertEqual("", parser.module_name) self.assertEqual("", parser.class_name) - parser = parsers[10] + parser = parsers[11] self.assertEqual("Trivy Operator Prometheus", parser.name) self.assertEqual("Other", parser.type) self.assertEqual("API", parser.source)