Skip to content

Commit

Permalink
reimplement rule loading mechanic (#754)
Browse files Browse the repository at this point in the history
* update CHANGELOG
* enable coverage for vscode testing
* add rule loader
* rename _create_from_dict to create_from_dict"
* add method to registry to get the rule class by rule_definition
* remove list_json_or_yaml_files function
  • Loading branch information
ekneg54 authored Feb 10, 2025
1 parent b949d7b commit 580d936
Show file tree
Hide file tree
Showing 61 changed files with 1,076 additions and 534 deletions.
23 changes: 1 addition & 22 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,98 +1,77 @@
name: CI

# only run tests for pull requests cause no file has to be changed without review
# open -> open the pull request
# synchronize -> push to branch of pull request
on:
pull_request:
types: [opened, synchronize]

jobs:
test:
uses: ./.github/workflows/testing.yml

build-docs:
runs-on: ubuntu-24.04

strategy:
matrix:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

- name: Install dependencies
run: |
sudo apt-get update && sudo apt-get -y install pandoc
pip install --upgrade pip wheel
pip install .[doc]
- name: build docs
run: |
cd doc
sphinx-apidoc -fT -o source/module_reference ../logprep
make clean html
code-quality:
runs-on: ubuntu-24.04

strategy:
matrix:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: azure/[email protected]
with:
version: "latest"
id: install

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

- name: Get changed python files
id: changed-files
uses: tj-actions/changed-files@v41
with:
files: |
**/*.py
- name: Install dependencies
run: |
pip install --upgrade pip wheel
pip install .[dev]
- name: check black formating
run: |
black --check --diff --config ./pyproject.toml .
- name: lint helm charts
run: |
helm lint --strict ./charts/logprep
- name: lint changed and added files
if: steps.changed-files.outputs.all_changed_files
run: |
pylint --rcfile=.pylintrc --fail-under 9.5 ${{ steps.changed-files.outputs.all_changed_files }}
pylint --fail-under 9.5 ${{ steps.changed-files.outputs.all_changed_files }}
- name: Run tests and collect coverage
run: pytest tests/unit --cov=logprep --cov-report=xml

- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v2

containerbuild:
uses: ./.github/workflows/container-build.yml
secrets: inherit
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ examples/k8s/charts
target
wheelhouse
requirements.*
.mypy_cache
8 changes: 7 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
"**/.profile": true
},
"python.analysis.typeCheckingMode": "off",
"python.experiments.optInto": [
"pythonTestAdapter"
],
"python.terminal.activateEnvInCurrentTerminal": true,
"python.analysis.enablePytestSupport": true,
"python.testing.pytestArgs": [
Expand All @@ -40,4 +43,7 @@
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
}
"cSpell.words": [
"concatenator"
],
}
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
* remove `hyperscan_resolver` processor because it is not significantly faster as the `generic_resolver` with enabled cache

### Features

* add support for rule files with suffix `.yaml`

### Improvements
* removes `colorama` dependency
* reimplemented the rule loading mechanic

### Bugfix
* fixes a bug with lucene regex and parentheses
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
" },\n",
" }\n",
"]\n",
"rules = [FieldManagerRule._create_from_dict(rule_dict) for rule_dict in rules_definitions]\n",
"rules = [FieldManagerRule.create_from_dict(rule_dict) for rule_dict in rules_definitions]\n",
"rules"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@
" }\n",
"}\n",
"\n",
"rule = IpInformerRule._create_from_dict(rule_definition)\n"
"rule = IpInformerRule.create_from_dict(rule_definition)\n"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
" },\n",
" }\n",
"]\n",
"rules = [StringSplitterRule._create_from_dict(rule_dict) for rule_dict in rules_definitions]\n",
"rules = [StringSplitterRule.create_from_dict(rule_dict) for rule_dict in rules_definitions]\n",
"rules"
]
},
Expand Down
16 changes: 8 additions & 8 deletions examples/compose/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

services:
opensearch:
image: public.ecr.aws/opensearchproject/opensearch:2
image: registry-1.docker.io/opensearchproject/opensearch:2
hostname: opensearch
container_name: opensearch
environment:
Expand All @@ -29,7 +29,7 @@ services:
networks:
- opensearch
dashboards:
image: public.ecr.aws/opensearchproject/opensearch-dashboards:2
image: registry-1.docker.io/opensearchproject/opensearch-dashboards:2
container_name: dashboards
environment:
- 'OPENSEARCH_HOSTS=["http://opensearch:9200"]'
Expand All @@ -41,7 +41,7 @@ services:
networks:
- opensearch
kafka:
image: bitnami/kafka:3.4
image: bitnami/kafka:3.9
container_name: kafka
hostname: kafka
ports:
Expand All @@ -61,11 +61,11 @@ services:
volumes:
- /var/run/docker.sock:/var/run/docker.sock
command: |
sh -c
"((sleep 15 && echo 'kafka up' &&
kafka-topics.sh --create --if-not-exists --bootstrap-server 127.0.0.1:9092 --replication-factor 1 --partitions 4 --topic consumer &&
kafka-topics.sh --create --if-not-exists --bootstrap-server 127.0.0.1:9092 --replication-factor 1 --partitions 4 --topic errors &&
kafka-topics.sh --create --if-not-exists --bootstrap-server 127.0.0.1:9092 --replication-factor 1 --partitions 4 --topic producer)&) &&
sh -c
"((sleep 15 && echo 'kafka up' &&
kafka-topics.sh --create --if-not-exists --bootstrap-server 127.0.0.1:9092 --replication-factor 1 --partitions 4 --topic consumer &&
kafka-topics.sh --create --if-not-exists --bootstrap-server 127.0.0.1:9092 --replication-factor 1 --partitions 4 --topic errors &&
kafka-topics.sh --create --if-not-exists --bootstrap-server 127.0.0.1:9092 --replication-factor 1 --partitions 4 --topic producer)&) &&
/opt/bitnami/scripts/kafka/run.sh"
healthcheck:
test:
Expand Down
2 changes: 1 addition & 1 deletion logprep/abc/getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def get_jsonl(self) -> List:
return parsed_events

@abstractmethod
def get_raw(self) -> bytearray:
def get_raw(self) -> bytes:
"""Get the content.
Returns
Expand Down
72 changes: 19 additions & 53 deletions logprep/abc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import logging
import os
from abc import abstractmethod
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional
from typing import TYPE_CHECKING, ClassVar, Dict, List, Type

from attr import define, field, validators

Expand All @@ -16,14 +15,13 @@
ProcessingError,
ProcessingWarning,
)
from logprep.util import getter
from logprep.util.helper import (
add_and_overwrite,
add_fields_to,
get_dotted_field_value,
pop_dotted_field_value,
)
from logprep.util.json_handling import list_json_files_in_directory
from logprep.util.rule_loader import RuleLoader

if TYPE_CHECKING:
from logprep.processor.base.rule import Rule # pragma: no cover
Expand Down Expand Up @@ -95,31 +93,28 @@ class Config(Component.Config):
For valid URI formats see :ref:`getters`.
As last option it is possible to define entire rules with all their configuration parameters as list elements.
"""
tree_config: Optional[str] = field(
default=None, validator=[validators.optional(validators.instance_of(str))]
tree_config: str | None = field(
default=None, validator=validators.instance_of((str, type(None)))
)
"""Path to a JSON file with a valid :ref:`Rule Tree Configuration`.
For string format see :ref:`getters`."""
apply_multiple_times: Optional[bool] = field(
default=False, validator=[validators.optional(validators.instance_of(bool))]
)
apply_multiple_times: bool = field(default=False, validator=validators.instance_of(bool))
"""Set if the processor should be applied multiple times. This enables further processing
of an output with the same processor."""

__slots__ = [
"rule_class",
"_event",
"_rule_tree",
"result",
"_bypass_rule_tree",
]

rule_class: "Rule"
rule_class: ClassVar["Type[Rule] | None"] = None
_event: dict
_rule_tree: RuleTree
_strategy = None
_bypass_rule_tree: bool
result: ProcessorResult
result: ProcessorResult | None

def __init__(self, name: str, configuration: "Processor.Config"):
super().__init__(name, configuration)
Expand Down Expand Up @@ -167,7 +162,7 @@ def process(self, event: dict) -> ProcessorResult:
"""
self.result = ProcessorResult(processor_name=self.name, event=event)
logger.debug(f"{self.describe()} processing event {event}")
logger.debug("%s processing event %s", self.describe(), event)
if self._bypass_rule_tree:
self._process_all_rules(event)
return self.result
Expand Down Expand Up @@ -233,7 +228,7 @@ def _apply_rules_wrapper(self, event: dict, rule: "Rule"):
@abstractmethod
def _apply_rules(self, event, rule): ... # pragma: no cover

def test_rules(self) -> dict:
def test_rules(self) -> dict | None:
"""Perform custom rule tests.
Returns a dict with a list of test results as tuples containing a result and an expected
Expand All @@ -242,47 +237,18 @@ def test_rules(self) -> dict:
"""

@staticmethod
def resolve_directories(rule_sources: list) -> list:
"""resolves directories to a list of files or rule definitions
Parameters
----------
rule_sources : list
a list of files, directories or rule definitions
Returns
-------
list
a list of files and rule definitions
"""
resolved_sources = []
for rule_source in rule_sources:
if isinstance(rule_source, dict):
resolved_sources.append(rule_source)
continue
getter_instance = getter.GetterFactory.from_string(rule_source)
if getter_instance.protocol == "file":
if Path(getter_instance.target).is_dir():
paths = list_json_files_in_directory(getter_instance.target)
for file_path in paths:
resolved_sources.append(file_path)
else:
resolved_sources.append(rule_source)
else:
resolved_sources.append(rule_source)
return resolved_sources

def load_rules(self, rules_targets: List[str]):
def load_rules(self, rules_targets: List[str | Dict]) -> None:
"""method to add rules from directories or urls"""
rules_targets = self.resolve_directories(rules_targets)
for rules_target in rules_targets:
rules = self.rule_class.create_rules_from_target(rules_target, self.name)
for rule in rules:
self._rule_tree.add_rule(rule)
if logger.isEnabledFor(logging.DEBUG): # pragma: no cover
try:
rules = RuleLoader(rules_targets, self.name).rules
except ValueError as error:
logger.error("Loading rules from %s failed: %s ", rules_targets, error)
raise error
for rule in rules:
self._rule_tree.add_rule(rule)
if logger.isEnabledFor(logging.DEBUG):
number_rules = self._rule_tree.number_of_rules
logger.debug(f"{self.describe()} loaded {number_rules} rules")
logger.debug("%s loaded %s rules", self.describe(), number_rules)

@staticmethod
def _field_exists(event: dict, dotted_field: str) -> bool:
Expand Down
10 changes: 3 additions & 7 deletions logprep/factory.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
"""This module contains a factory to create connectors and processors."""

import copy
from typing import TYPE_CHECKING

from logprep.abc.component import Component
from logprep.configuration import Configuration
from logprep.factory_error import (
InvalidConfigSpecificationError,
InvalidConfigurationError,
)

if TYPE_CHECKING: # pragma: no cover
from logprep.abc.component import Component


class Factory:
"""Create components for logprep."""

@classmethod
def create(cls, configuration: dict) -> "Component":
def create(cls, configuration: dict) -> Component | None:
"""Create component."""
if configuration == {} or configuration is None:
raise InvalidConfigurationError("The component definition is empty.")
Expand All @@ -40,3 +35,4 @@ def create(cls, configuration: dict) -> "Component":
component_name, component_configuration_dict
)
return component(component_name, component_configuration)
return None
Loading

0 comments on commit 580d936

Please sign in to comment.