From eddc39135dbd94490cee93024743a107cf96fe57 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 28 Nov 2024 22:44:32 -0500 Subject: [PATCH 01/54] feat: add monitring component --- src/solace_ai_connector/common/monitoring.py | 70 +++++++++++++++++++ .../components/component_base.py | 27 +++++++ .../components/inputs_outputs/broker_base.py | 12 ++++ .../solace_ai_connector.py | 2 + 4 files changed, 111 insertions(+) create mode 100644 src/solace_ai_connector/common/monitoring.py diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py new file mode 100644 index 00000000..ff3b4bc3 --- /dev/null +++ b/src/solace_ai_connector/common/monitoring.py @@ -0,0 +1,70 @@ +from typing import Any +from datadog import initialize, statsd + +from .log import log + + +class Monitoring: + """ + A singleton class to collect and send metrics to Datadog. + """ + + _instance = None + _initialized = False + + def __new__(cls, *args, **kwargs): + if not cls._instance: + cls._instance = super(Monitoring, cls).__new__(cls) + return cls._instance + + def __init__(self, config: dict[str, Any] = None) -> None: + """ + Initialize the MetricCollector with Datadog configuration. + + :param config: Configuration for Datadog + """ + + if self._initialized: + return + + self.enabled = False + + monitoring = config.get("monitoring", {}) + if monitoring is not {}: + self.enabled = monitoring.get("enabled", False) + tags = monitoring.get("tags", []) + if "host" not in monitoring: + log.error( + "Monitoring configuration is missing host. Disabling monitoring." + ) + self.enabled = False + else: + host = monitoring.get("host") + if "port" not in monitoring: + log.error( + "Monitoring configuration is missing port. Disabling monitoring." + ) + self.enabled = False + else: + port = monitoring.get("port") + + # Initialize Datadog with provided options + if self.enabled: + options = { + "statsd_constant_tags": tags, + "statsd_host": host, + "statsd_port": port, + } + + initialize(**options) + self._initialized = True + + def send_metric(self, metric_name: str, metric_value: Any) -> None: + """ + Send a metric to Datadog. + + :param metric_name: Name of the metric + :param metric_value: Value of the metric + """ + if self.enabled: + statsd.gauge(metric_name, metric_value) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 63dd8fb3..45d87d92 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -2,7 +2,9 @@ import queue import traceback import pprint +import time from abc import abstractmethod +from typing import Any from ..common.log import log from ..common.utils import resolve_config_values from ..common.utils import get_source_expression @@ -11,6 +13,7 @@ from ..common.trace_message import TraceMessage from ..common.event import Event, EventType from ..flow.request_response_flow_controller import RequestResponseFlowController +from ..common.monitoring import Monitoring DEFAULT_QUEUE_TIMEOUT_MS = 1000 DEFAULT_QUEUE_MAX_DEPTH = 5 @@ -65,6 +68,10 @@ def create_thread_and_run(self): return self.thread def run(self): + # Start the micro monitoring thread + monitoring_thread = threading.Thread(target=self.run_micro_monitoring) + monitoring_thread.start() + # Process events until the stop signal is set while not self.stop_signal.is_set(): event = None try: @@ -77,6 +84,7 @@ def run(self): self.handle_component_error(e, event) self.stop_component() + monitoring_thread.join() def process_event_with_tracing(self, event): if self.trace_queue: @@ -452,3 +460,22 @@ def do_broker_request_response( raise ValueError( f"Broker request response controller not found for component {self.name}" ) + + def get_metrics(self) -> dict[str, Any]: + return {} + + def run_micro_monitoring(self) -> None: + """ + Start the metric collection and sending process in a loop. + """ + monitoring = Monitoring() + try: + while not self.stop_signal.is_set(): + # Collect and send metrics every 60 seconds + metrics = self.get_metrics() + for metric_name, metric_value in metrics.items(): + monitoring.send_metric(metric_name, metric_value) + log.info("Sent metric %s: %s", metric_name, metric_value) + time.sleep(10) + except KeyboardInterrupt: + log.info("Monitoring stopped.") diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index c312740b..39b150c3 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -1,13 +1,17 @@ """Base class for broker input/output components for the Solace AI Event Connector""" import uuid +import json +from typing import List from abc import abstractmethod +from solace.messaging.utils.manageable import ApiMetrics, Metric from ..component_base import ComponentBase from ...common.message import Message from ...common.messaging.messaging_builder import MessagingServiceBuilder from ...common.utils import encode_payload, decode_payload +from ...common.log import log # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes @@ -30,6 +34,7 @@ class BrokerBase(ComponentBase): + def __init__(self, module_info, **kwargs): super().__init__(module_info, **kwargs) self.broker_properties = self.get_broker_properties() @@ -105,3 +110,10 @@ def start(self): def generate_uuid(self): return str(uuid.uuid4()) + + def get_metrics(self): + metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() + str_metrics = str(metrics) + stats_dict = json.loads(str_metrics) + log.debug(f"API metrics: {stats_dict}\n") + return stats_dict diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index ec2d5f07..80560cfe 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -11,6 +11,7 @@ from .flow.timer_manager import TimerManager from .common.event import Event, EventType from .services.cache_service import CacheService, create_storage_backend +from .common.monitoring import Monitoring class SolaceAiConnector: @@ -32,6 +33,7 @@ def __init__(self, config, event_handlers=None, error_queue=None): self.instance_name = self.config.get("instance_name", "solace_ai_connector") self.timer_manager = TimerManager(self.stop_signal) self.cache_service = self.setup_cache_service() + self.monitoring = Monitoring(config) def run(self): """Run the Solace AI Event Connector""" From a955cfb21a254ed23ba1a76402d876652bd87c29 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 28 Nov 2024 23:49:32 -0500 Subject: [PATCH 02/54] fix: resolve a bug --- .../components/inputs_outputs/broker_request_response.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py index bdaea627..3a1dfe2d 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py @@ -439,3 +439,7 @@ def cleanup(self): if self.response_thread: self.response_thread.join() super().cleanup() + + def get_metrics(self): + # override because it removes messaging_service from the BrokerBase + return {} From 2b271e3a583ce68981b3326deef109e4cb908994 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 29 Nov 2024 14:13:12 -0500 Subject: [PATCH 03/54] fix: add sleep time --- src/solace_ai_connector/components/component_base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 45d87d92..31e9d987 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -17,6 +17,7 @@ DEFAULT_QUEUE_TIMEOUT_MS = 1000 DEFAULT_QUEUE_MAX_DEPTH = 5 +DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME = 60 class ComponentBase: @@ -79,8 +80,10 @@ def run(self): if event is not None: self.process_event_with_tracing(event) except AssertionError as e: + time.sleep(DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME) raise e except Exception as e: + time.sleep(DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME) self.handle_component_error(e, event) self.stop_component() @@ -475,7 +478,7 @@ def run_micro_monitoring(self) -> None: metrics = self.get_metrics() for metric_name, metric_value in metrics.items(): monitoring.send_metric(metric_name, metric_value) - log.info("Sent metric %s: %s", metric_name, metric_value) + log.debug("Sent metric %s: %s", metric_name, metric_value) time.sleep(10) except KeyboardInterrupt: log.info("Monitoring stopped.") From 507504386f409b1db35057624248dc0fdd16761c Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 29 Nov 2024 14:14:14 -0500 Subject: [PATCH 04/54] fix: add sleep time --- src/solace_ai_connector/components/component_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 31e9d987..57c115bf 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -17,7 +17,7 @@ DEFAULT_QUEUE_TIMEOUT_MS = 1000 DEFAULT_QUEUE_MAX_DEPTH = 5 -DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME = 60 +DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME = 10 class ComponentBase: @@ -474,7 +474,7 @@ def run_micro_monitoring(self) -> None: monitoring = Monitoring() try: while not self.stop_signal.is_set(): - # Collect and send metrics every 60 seconds + # Collect and send metrics every 10 seconds metrics = self.get_metrics() for metric_name, metric_value in metrics.items(): monitoring.send_metric(metric_name, metric_value) From 03d8b75f5f706cebef4e55e1e5291a4cc664c475 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 29 Nov 2024 16:41:00 -0500 Subject: [PATCH 05/54] feat: add readiness and handle excessive logs --- src/solace_ai_connector/common/monitoring.py | 18 ++++++++++++++++++ .../components/component_base.py | 16 +++++++++++++--- src/solace_ai_connector/solace_ai_connector.py | 5 +++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index ff3b4bc3..5ba3ae8c 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -11,6 +11,8 @@ class Monitoring: _instance = None _initialized = False + _ready = False + _live = False def __new__(cls, *args, **kwargs): if not cls._instance: @@ -59,6 +61,22 @@ def __init__(self, config: dict[str, Any] = None) -> None: initialize(**options) self._initialized = True + def set_readiness(self, ready: bool) -> None: + """ + Set the readiness status of the MetricCollector. + + :param ready: Readiness status + """ + self._ready = ready + + def set_liveness(self, live: bool) -> None: + """ + Set the liveness status of the MetricCollector. + + :param live: Liveness status + """ + self._live = live + def send_metric(self, metric_name: str, metric_value: Any) -> None: """ Send a metric to Datadog. diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 57c115bf..f0202fe6 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -17,7 +17,6 @@ DEFAULT_QUEUE_TIMEOUT_MS = 1000 DEFAULT_QUEUE_MAX_DEPTH = 5 -DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME = 10 class ComponentBase: @@ -55,6 +54,7 @@ def __init__(self, module_info, **kwargs): self.stop_thread_event = threading.Event() self.current_message = None self.current_message_has_been_discarded = False + self.event_message_repeat_sleep_time = 1 self.log_identifier = f"[{self.instance_name}.{self.flow_name}.{self.name}] " @@ -63,6 +63,13 @@ def __init__(self, module_info, **kwargs): self.setup_communications() self.setup_broker_request_response() + def grow_sleep_time(self): + if self.event_message_repeat_sleep_time < 60: + self.event_message_repeat_sleep_time *= 2 + + def reset_sleep_time(self): + self.event_message_repeat_sleep_time = 1 + def create_thread_and_run(self): self.thread = threading.Thread(target=self.run) self.thread.start() @@ -79,11 +86,14 @@ def run(self): event = self.get_next_event() if event is not None: self.process_event_with_tracing(event) + self.reset_sleep_time() except AssertionError as e: - time.sleep(DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME) + time.sleep(self.event_message_repeat_sleep_time) + self.grow_sleep_time() raise e except Exception as e: - time.sleep(DEFAULT_EVENT_MESSAGE_RETRY_SLEEP_TIME) + time.sleep(self.event_message_repeat_sleep_time) + self.grow_sleep_time() self.handle_component_error(e, event) self.stop_component() diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 80560cfe..3f0b7c18 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -46,6 +46,8 @@ def run(self): if on_flow_creation: on_flow_creation(self.flows) + self.monitoring.set_readiness(True) + log.info("Solace AI Event Connector started successfully") except Exception as e: log.error("Error during Solace AI Event Connector startup: %s", str(e)) @@ -221,3 +223,6 @@ def stop(self): self.cache_service.stop() # Stop the cache service if self.trace_thread: self.trace_thread.join() + + self.monitoring.set_liveness(False) + self.monitoring.set_readiness(False) From 99650332df71a6af4f126cce2ff0ef46387e9b3a Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 2 Dec 2024 14:59:39 -0500 Subject: [PATCH 06/54] fix: handle sleep error --- src/solace_ai_connector/components/component_base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index f0202fe6..a57f472d 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -88,11 +88,17 @@ def run(self): self.process_event_with_tracing(event) self.reset_sleep_time() except AssertionError as e: - time.sleep(self.event_message_repeat_sleep_time) + try: + time.sleep(self.event_message_repeat_sleep_time) + except KeyboardInterrupt: + raise e self.grow_sleep_time() raise e except Exception as e: - time.sleep(self.event_message_repeat_sleep_time) + try: + time.sleep(self.event_message_repeat_sleep_time) + except KeyboardInterrupt: + raise e self.grow_sleep_time() self.handle_component_error(e, event) From 6c5362bc3ae3827ce1ba6d9b7b786367c5897786 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 2 Dec 2024 15:09:36 -0500 Subject: [PATCH 07/54] fix: handle sleep error --- src/solace_ai_connector/components/component_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index a57f472d..e2a2e5ae 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -86,7 +86,7 @@ def run(self): event = self.get_next_event() if event is not None: self.process_event_with_tracing(event) - self.reset_sleep_time() + self.reset_sleep_time() except AssertionError as e: try: time.sleep(self.event_message_repeat_sleep_time) From 3dbba60b710f510778de61860d1c09bcd8d30f74 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 2 Dec 2024 23:05:12 -0500 Subject: [PATCH 08/54] feat: gracefully exit --- src/solace_ai_connector/common/log.py | 74 +++++++++++++++++-- src/solace_ai_connector/common/utils.py | 4 +- .../components/component_base.py | 6 +- .../general/llm/litellm/litellm_base.py | 2 +- src/solace_ai_connector/main.py | 9 ++- .../solace_ai_connector.py | 32 +++++++- 6 files changed, 111 insertions(+), 16 deletions(-) diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index ac151513..4d0dc815 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -2,6 +2,8 @@ import logging import logging.handlers import json +import os +from datetime import datetime log = logging.getLogger("solace_ai_connector") @@ -35,7 +37,43 @@ def format(self, record): return json.dumps(log_record) -def setup_log(logFilePath, stdOutLogLevel, fileLogLevel, logFormat): +def convert_to_bytes(size_str): + size_str = size_str.upper() + size_units = {"KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4, "B": 1} + for unit in size_units: + if size_str.endswith(unit): + return int(size_str[: -len(unit)]) * size_units[unit] + return int(size_str) + + +def check_total_size(total_size_cap): + total_size = sum( + os.path.getsize(handler.baseFilename) + for handler in log.handlers + if isinstance(handler, logging.handlers.RotatingFileHandler) + ) + if total_size > total_size_cap: + return False + return True + + +def archive_log_file(logFilePath, log_file_name, max_file_size): + if os.path.getsize(logFilePath) > max_file_size: + os.rename(logFilePath, log_file_name) + with open(logFilePath, "w") as file: + file.write("") + + +def setup_log( + logFilePath, + stdOutLogLevel, + fileLogLevel, + logFormat, + file_name_pattern, + max_file_size, + max_history, + total_size_cap, +): """ Set up the configuration for the logger. @@ -44,8 +82,15 @@ def setup_log(logFilePath, stdOutLogLevel, fileLogLevel, logFormat): stdOutLogLevel (int): Logging level for standard output. fileLogLevel (int): Logging level for the log file. logFormat (str): Format of the log output ('jsonl' or 'pipe-delimited'). - + file_name_pattern (str): Pattern for the log file names. + max_file_size (str): Maximum size of a log file before rolling over (e.g., '10MB'). + max_history (int): Maximum number of backup files to keep. + total_size_cap (str): Maximum total size of all log files (e.g., '1GB'). """ + # Convert size strings to bytes + max_file_size = convert_to_bytes(max_file_size) + total_size_cap = convert_to_bytes(total_size_cap) + # Set the global logger level to the lowest of the two levels log.setLevel(min(stdOutLogLevel, fileLogLevel)) @@ -58,9 +103,18 @@ def setup_log(logFilePath, stdOutLogLevel, fileLogLevel, logFormat): with open(logFilePath, "w") as file: file.write("") - # file_handler = logging.handlers.TimedRotatingFileHandler( - # filename=logFilePath, when='midnight', backupCount=30, mode='w') - file_handler = logging.FileHandler(filename=logFilePath, mode="a") + # Generate the log file name using the pattern + log_file_name = file_name_pattern.replace("${LOG_FILE}", logFilePath) + log_file_name = log_file_name.replace( + "%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d") + ) + log_file_name = log_file_name.replace("%i", "0") # Initial index for the log file + + file_handler = logging.handlers.RotatingFileHandler( + filename=logFilePath, + backupCount=max_history, + maxBytes=max_file_size, + ) if logFormat == "jsonl": file_formatter = JsonlFormatter() else: @@ -70,3 +124,13 @@ def setup_log(logFilePath, stdOutLogLevel, fileLogLevel, logFormat): log.addHandler(file_handler) log.addHandler(stream_handler) + + # Ensure total size cap is not exceeded + if total_size_cap > 0: + log.addFilter(lambda record: check_total_size(total_size_cap)) + + # Archive the log file when it exceeds maxBytes + log.addFilter( + lambda record: archive_log_file(logFilePath, log_file_name, max_file_size) + or True + ) diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index aaf9ee4f..b4a5a46a 100755 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -73,7 +73,7 @@ def get_subdirectories(path=None): def resolve_config_values(config, allow_source_expression=False): """Resolve any config module values in the config by processing 'invoke' entries""" - log.debug("Resolving config values in %s", config) + # log.debug("Resolving config values in %s", config) if not isinstance(config, (dict, list)): return config if isinstance(config, list): @@ -381,7 +381,7 @@ def decode_payload(payload, encoding, payload_format): ): payload = payload.decode("utf-8") elif encoding == "unicode_escape": - payload = payload.decode('unicode_escape') + payload = payload.decode("unicode_escape") if payload_format == "json": payload = json.loads(payload) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index e2a2e5ae..9456aa71 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -91,14 +91,14 @@ def run(self): try: time.sleep(self.event_message_repeat_sleep_time) except KeyboardInterrupt: - raise e + self.handle_component_error(e, event) self.grow_sleep_time() - raise e + self.handle_component_error(e, event) except Exception as e: try: time.sleep(self.event_message_repeat_sleep_time) except KeyboardInterrupt: - raise e + self.handle_component_error(e, event) self.grow_sleep_time() self.handle_component_error(e, event) diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py index 22bb2e02..6f1fdc62 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py @@ -65,7 +65,7 @@ def init_load_balancer(self): """initialize a load balancer""" try: self.router = litellm.Router(model_list=self.load_balancer) - log.debug("Load balancer initialized with models: %s", self.load_balancer) + log.debug("Litellm Load balancer was initialized") except Exception as e: raise ValueError(f"Error initializing load balancer: {e}") diff --git a/src/solace_ai_connector/main.py b/src/solace_ai_connector/main.py index fe2ccf62..f8922a3e 100644 --- a/src/solace_ai_connector/main.py +++ b/src/solace_ai_connector/main.py @@ -2,7 +2,7 @@ import sys import re import yaml -import atexit +import signal from .solace_ai_connector import SolaceAiConnector @@ -108,10 +108,12 @@ def shutdown(): """Shutdown the application.""" print("Stopping Solace AI Connector") app.stop() - app.cleanup() + app.cleanup() print("Solace AI Connector exited successfully!") os._exit(0) - atexit.register(shutdown) + + signal.signal(signal.SIGINT, lambda s, f: shutdown()) + signal.signal(signal.SIGTERM, lambda s, f: shutdown()) # Start the application app.run() @@ -121,6 +123,7 @@ def shutdown(): except KeyboardInterrupt: shutdown() + if __name__ == "__main__": # Read in the configuration yaml filenames from the args diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 3f0b7c18..cc1d092a 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -121,12 +121,40 @@ def cleanup(self): def setup_logging(self): """Setup logging""" + # Default rolling values + file_name_pattern = "${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" + max_file_size = "100MB" + max_history = 7 + total_size_cap = "1GB" + log_config = self.config.get("log", {}) stdout_log_level = log_config.get("stdout_log_level", "INFO") - log_file_level = log_config.get("log_file_level", "DEBUG") + log_file_level = log_config.get("log_file_level", "INFO") log_file = log_config.get("log_file", "solace_ai_connector.log") log_format = log_config.get("log_format", "pipe-delimited") - setup_log(log_file, stdout_log_level, log_file_level, log_format) + + # Get logback values + logback = log_config.get("logback", {}) + if logback: + rollingpolicy = logback.get("rollingpolicy", {}) + if rollingpolicy: + file_name_pattern = rollingpolicy.get( + "file_name_pattern", "${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" + ) + max_file_size = rollingpolicy.get("max-file-size", "100MB") + max_history = rollingpolicy.get("max-history", 7) + total_size_cap = rollingpolicy.get("total-size-cap", "1GB") + + setup_log( + log_file, + stdout_log_level, + log_file_level, + log_format, + file_name_pattern, + max_file_size, + max_history, + total_size_cap, + ) def setup_trace(self): """Setup trace""" From a62d4cff918d34c760a2d110d34251623a5ef4c2 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 3 Dec 2024 10:55:00 -0500 Subject: [PATCH 09/54] feat: set the log back --- src/solace_ai_connector/common/log.py | 90 ++++++++++++------- .../components/component_base.py | 2 +- .../components/inputs_outputs/broker_base.py | 1 - .../solace_ai_connector.py | 19 +--- 4 files changed, 62 insertions(+), 50 deletions(-) diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index 4d0dc815..55ad6b9e 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -69,10 +69,7 @@ def setup_log( stdOutLogLevel, fileLogLevel, logFormat, - file_name_pattern, - max_file_size, - max_history, - total_size_cap, + logBack, ): """ Set up the configuration for the logger. @@ -82,14 +79,8 @@ def setup_log( stdOutLogLevel (int): Logging level for standard output. fileLogLevel (int): Logging level for the log file. logFormat (str): Format of the log output ('jsonl' or 'pipe-delimited'). - file_name_pattern (str): Pattern for the log file names. - max_file_size (str): Maximum size of a log file before rolling over (e.g., '10MB'). - max_history (int): Maximum number of backup files to keep. - total_size_cap (str): Maximum total size of all log files (e.g., '1GB'). + logBack (dict): Rolling log file configuration. """ - # Convert size strings to bytes - max_file_size = convert_to_bytes(max_file_size) - total_size_cap = convert_to_bytes(total_size_cap) # Set the global logger level to the lowest of the two levels log.setLevel(min(stdOutLogLevel, fileLogLevel)) @@ -103,34 +94,73 @@ def setup_log( with open(logFilePath, "w") as file: file.write("") - # Generate the log file name using the pattern - log_file_name = file_name_pattern.replace("${LOG_FILE}", logFilePath) - log_file_name = log_file_name.replace( - "%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d") - ) - log_file_name = log_file_name.replace("%i", "0") # Initial index for the log file + file_handler = logging.FileHandler(filename=logFilePath, mode="a") + if logBack: + rollingpolicy = logBack.get("rollingpolicy", {}) + if rollingpolicy: + if "file_name_pattern" not in rollingpolicy: + log.warning( + "file_name_pattern is required in rollingpolicy. Continuing with default value '${LOG_FILE}.%d{yyyy-MM-dd}.%i'." + ) + file_name_pattern = rollingpolicy.get( + "file_name_pattern", "${LOG_FILE}.%d{yyyy-MM-dd}.%i" + ) + + if "max-file-size" not in rollingpolicy: + log.warning( + "max-file-size is required in rollingpolicy. Continuing with default value '1GB'." + ) + max_file_size = rollingpolicy.get("max-file-size", "1GB") + + if "max-history" not in rollingpolicy: + log.warning( + "max-history is required in rollingpolicy. Continuing with default value '7'." + ) + max_history = rollingpolicy.get("max-history", 7) + + if "total-size-cap" not in rollingpolicy: + log.warning( + "total-size-cap is required in rollingpolicy. Continuing with default value '1TB'." + ) + total_size_cap = rollingpolicy.get("total-size-cap", "1TB") + + # Convert size strings to bytes + max_file_size = convert_to_bytes(max_file_size) + total_size_cap = convert_to_bytes(total_size_cap) + + # Generate the log file name using the pattern + log_file_name = file_name_pattern.replace("${LOG_FILE}", logFilePath) + log_file_name = log_file_name.replace( + "%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d") + ) + log_file_name = log_file_name.replace( + "%i", "0" + ) # Initial index for the log file + + # Overwrite the file handler with a rotating file handler + file_handler = logging.handlers.RotatingFileHandler( + filename=logFilePath, + backupCount=max_history, + maxBytes=max_file_size, + ) - file_handler = logging.handlers.RotatingFileHandler( - filename=logFilePath, - backupCount=max_history, - maxBytes=max_file_size, - ) if logFormat == "jsonl": file_formatter = JsonlFormatter() else: file_formatter = logging.Formatter("%(asctime)s | %(levelname)s: %(message)s") + file_handler.setFormatter(file_formatter) file_handler.setLevel(fileLogLevel) log.addHandler(file_handler) log.addHandler(stream_handler) - # Ensure total size cap is not exceeded - if total_size_cap > 0: - log.addFilter(lambda record: check_total_size(total_size_cap)) + # # Ensure total size cap is not exceeded + # if total_size_cap > 0: + # log.addFilter(lambda record: check_total_size(total_size_cap)) - # Archive the log file when it exceeds maxBytes - log.addFilter( - lambda record: archive_log_file(logFilePath, log_file_name, max_file_size) - or True - ) + # # Archive the log file when it exceeds maxBytes + # log.addFilter( + # lambda record: archive_log_file(logFilePath, log_file_name, max_file_size) + # or True + # ) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 9456aa71..e5704e53 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -495,6 +495,6 @@ def run_micro_monitoring(self) -> None: for metric_name, metric_value in metrics.items(): monitoring.send_metric(metric_name, metric_value) log.debug("Sent metric %s: %s", metric_name, metric_value) - time.sleep(10) + time.sleep(60) except KeyboardInterrupt: log.info("Monitoring stopped.") diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 39b150c3..f9cf10c1 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -115,5 +115,4 @@ def get_metrics(self): metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() str_metrics = str(metrics) stats_dict = json.loads(str_metrics) - log.debug(f"API metrics: {stats_dict}\n") return stats_dict diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index cc1d092a..ad059bdb 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -121,11 +121,6 @@ def cleanup(self): def setup_logging(self): """Setup logging""" - # Default rolling values - file_name_pattern = "${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" - max_file_size = "100MB" - max_history = 7 - total_size_cap = "1GB" log_config = self.config.get("log", {}) stdout_log_level = log_config.get("stdout_log_level", "INFO") @@ -135,25 +130,13 @@ def setup_logging(self): # Get logback values logback = log_config.get("logback", {}) - if logback: - rollingpolicy = logback.get("rollingpolicy", {}) - if rollingpolicy: - file_name_pattern = rollingpolicy.get( - "file_name_pattern", "${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" - ) - max_file_size = rollingpolicy.get("max-file-size", "100MB") - max_history = rollingpolicy.get("max-history", 7) - total_size_cap = rollingpolicy.get("total-size-cap", "1GB") setup_log( log_file, stdout_log_level, log_file_level, log_format, - file_name_pattern, - max_file_size, - max_history, - total_size_cap, + logback, ) def setup_trace(self): From 116068a856b227bd7974a95de4fc37d2ba2ab218 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 3 Dec 2024 12:05:36 -0500 Subject: [PATCH 10/54] fix: rename log fields --- src/solace_ai_connector/common/log.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index 55ad6b9e..f212f46c 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -98,12 +98,12 @@ def setup_log( if logBack: rollingpolicy = logBack.get("rollingpolicy", {}) if rollingpolicy: - if "file_name_pattern" not in rollingpolicy: + if "file-name-pattern" not in rollingpolicy: log.warning( - "file_name_pattern is required in rollingpolicy. Continuing with default value '${LOG_FILE}.%d{yyyy-MM-dd}.%i'." + "file-name-pattern is required in rollingpolicy. Continuing with default value '${LOG_FILE}.%d{yyyy-MM-dd}.%i'." ) file_name_pattern = rollingpolicy.get( - "file_name_pattern", "${LOG_FILE}.%d{yyyy-MM-dd}.%i" + "file-name-pattern", "${LOG_FILE}.%d{yyyy-MM-dd}.%i" ) if "max-file-size" not in rollingpolicy: From 4c80e41508897d66bf1add43429e8ae6343e1515 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 3 Dec 2024 12:06:13 -0500 Subject: [PATCH 11/54] fix: disabled monitoring --- src/solace_ai_connector/components/component_base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index e5704e53..1a7b1115 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -493,8 +493,9 @@ def run_micro_monitoring(self) -> None: # Collect and send metrics every 10 seconds metrics = self.get_metrics() for metric_name, metric_value in metrics.items(): - monitoring.send_metric(metric_name, metric_value) - log.debug("Sent metric %s: %s", metric_name, metric_value) + pass + # monitoring.send_metric(metric_name, metric_value) + # log.debug("Sent metric %s: %s", metric_name, metric_value) time.sleep(60) except KeyboardInterrupt: log.info("Monitoring stopped.") From 98b86996eed6903ef3752c24441c2dbd704a51c0 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 3 Dec 2024 16:59:05 -0500 Subject: [PATCH 12/54] fix: resolve log naming --- src/solace_ai_connector/common/log.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index f212f46c..0fbd74b2 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -100,10 +100,10 @@ def setup_log( if rollingpolicy: if "file-name-pattern" not in rollingpolicy: log.warning( - "file-name-pattern is required in rollingpolicy. Continuing with default value '${LOG_FILE}.%d{yyyy-MM-dd}.%i'." + "file-name-pattern is required in rollingpolicy. Continuing with default value '{LOG_FILE}.%d{yyyy-MM-dd}.%i'." ) file_name_pattern = rollingpolicy.get( - "file-name-pattern", "${LOG_FILE}.%d{yyyy-MM-dd}.%i" + "file-name-pattern", "{LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" ) if "max-file-size" not in rollingpolicy: @@ -129,7 +129,7 @@ def setup_log( total_size_cap = convert_to_bytes(total_size_cap) # Generate the log file name using the pattern - log_file_name = file_name_pattern.replace("${LOG_FILE}", logFilePath) + log_file_name = file_name_pattern.replace("{LOG_FILE}", logFilePath) log_file_name = log_file_name.replace( "%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d") ) @@ -139,7 +139,7 @@ def setup_log( # Overwrite the file handler with a rotating file handler file_handler = logging.handlers.RotatingFileHandler( - filename=logFilePath, + filename=log_file_name, backupCount=max_history, maxBytes=max_file_size, ) From 796fd67e3a156c7f6389cfe7c35932a8b10ad210 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Wed, 4 Dec 2024 12:42:44 -0500 Subject: [PATCH 13/54] fix: resolved logging issues --- src/solace_ai_connector/common/log.py | 53 ++++++--------------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index 0fbd74b2..99ccd028 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -46,24 +46,6 @@ def convert_to_bytes(size_str): return int(size_str) -def check_total_size(total_size_cap): - total_size = sum( - os.path.getsize(handler.baseFilename) - for handler in log.handlers - if isinstance(handler, logging.handlers.RotatingFileHandler) - ) - if total_size > total_size_cap: - return False - return True - - -def archive_log_file(logFilePath, log_file_name, max_file_size): - if os.path.getsize(logFilePath) > max_file_size: - os.rename(logFilePath, log_file_name) - with open(logFilePath, "w") as file: - file.write("") - - def setup_log( logFilePath, stdOutLogLevel, @@ -90,11 +72,11 @@ def setup_log( stream_formatter = logging.Formatter("%(message)s") stream_handler.setFormatter(stream_formatter) - # Create an empty file at logFilePath (this will overwrite any existing content) - with open(logFilePath, "w") as file: - file.write("") + if logFormat == "jsonl": + file_formatter = JsonlFormatter() + else: + file_formatter = logging.Formatter("%(asctime)s | %(levelname)s: %(message)s") - file_handler = logging.FileHandler(filename=logFilePath, mode="a") if logBack: rollingpolicy = logBack.get("rollingpolicy", {}) if rollingpolicy: @@ -129,13 +111,13 @@ def setup_log( total_size_cap = convert_to_bytes(total_size_cap) # Generate the log file name using the pattern - log_file_name = file_name_pattern.replace("{LOG_FILE}", logFilePath) - log_file_name = log_file_name.replace( - "%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d") + log_file_name = ( + file_name_pattern.replace( + "%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d") + ) + .replace("%i", "0") + .replace("${LOG_FILE}", logFilePath) ) - log_file_name = log_file_name.replace( - "%i", "0" - ) # Initial index for the log file # Overwrite the file handler with a rotating file handler file_handler = logging.handlers.RotatingFileHandler( @@ -143,24 +125,11 @@ def setup_log( backupCount=max_history, maxBytes=max_file_size, ) - - if logFormat == "jsonl": - file_formatter = JsonlFormatter() else: - file_formatter = logging.Formatter("%(asctime)s | %(levelname)s: %(message)s") + file_handler = logging.FileHandler(filename=logFilePath, mode="a") file_handler.setFormatter(file_formatter) file_handler.setLevel(fileLogLevel) log.addHandler(file_handler) log.addHandler(stream_handler) - - # # Ensure total size cap is not exceeded - # if total_size_cap > 0: - # log.addFilter(lambda record: check_total_size(total_size_cap)) - - # # Archive the log file when it exceeds maxBytes - # log.addFilter( - # lambda record: archive_log_file(logFilePath, log_file_name, max_file_size) - # or True - # ) From cbbbd7c0200361aa564a17ccb72784a093ecf7c6 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 5 Dec 2024 09:05:43 -0500 Subject: [PATCH 14/54] fix: resolve log --- src/solace_ai_connector/common/log.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index 99ccd028..b6a1489f 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -111,13 +111,7 @@ def setup_log( total_size_cap = convert_to_bytes(total_size_cap) # Generate the log file name using the pattern - log_file_name = ( - file_name_pattern.replace( - "%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d") - ) - .replace("%i", "0") - .replace("${LOG_FILE}", logFilePath) - ) + log_file_name = logFilePath # Overwrite the file handler with a rotating file handler file_handler = logging.handlers.RotatingFileHandler( @@ -125,6 +119,11 @@ def setup_log( backupCount=max_history, maxBytes=max_file_size, ) + file_handler.namer = ( + lambda name: file_name_pattern.replace("{LOG_FILE}", logFilePath) + .replace("%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d")) + .replace("%i", str(name.split(".")[-1])) + ) else: file_handler = logging.FileHandler(filename=logFilePath, mode="a") From 37e31442714f3b3c856044cac55df63cbcc078ac Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 5 Dec 2024 09:25:28 -0500 Subject: [PATCH 15/54] fix: resolve log --- src/solace_ai_connector/common/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solace_ai_connector/common/log.py b/src/solace_ai_connector/common/log.py index b6a1489f..9472b52c 100644 --- a/src/solace_ai_connector/common/log.py +++ b/src/solace_ai_connector/common/log.py @@ -120,7 +120,7 @@ def setup_log( maxBytes=max_file_size, ) file_handler.namer = ( - lambda name: file_name_pattern.replace("{LOG_FILE}", logFilePath) + lambda name: file_name_pattern.replace("${LOG_FILE}", logFilePath) .replace("%d{yyyy-MM-dd}", datetime.now().strftime("%Y-%m-%d")) .replace("%i", str(name.split(".")[-1])) ) From 52b5233bb7f79c5596ac11a5a6d28dc7ca7506a3 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 5 Dec 2024 17:13:49 -0500 Subject: [PATCH 16/54] feat: remove dependency to Langchain --- examples/llm/anthropic_chat.yaml | 2 +- examples/llm/bedrock_anthropic_chat.yaml | 2 +- .../langchain_openai_with_history_chat.yaml | 2 +- examples/llm/mixture_of_agents.yaml | 2 +- examples/llm/openai_chat.yaml | 2 +- examples/llm/openai_chroma_rag.yaml | 2 +- .../openai_component_request_response.yaml | 2 +- examples/llm/vertexai_chat.yaml | 2 +- pyproject.toml | 6 +-- requirements.txt | 7 ++-- .../components/__init__.py | 38 +------------------ 11 files changed, 15 insertions(+), 52 deletions(-) diff --git a/examples/llm/anthropic_chat.yaml b/examples/llm/anthropic_chat.yaml index cc6e8fd8..fb58a585 100644 --- a/examples/llm/anthropic_chat.yaml +++ b/examples/llm/anthropic_chat.yaml @@ -12,7 +12,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain-anthropic +# pip install -U langchain-anthropic langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - ANTHROPIC_API_KEY diff --git a/examples/llm/bedrock_anthropic_chat.yaml b/examples/llm/bedrock_anthropic_chat.yaml index 421ce428..c6e69f9d 100644 --- a/examples/llm/bedrock_anthropic_chat.yaml +++ b/examples/llm/bedrock_anthropic_chat.yaml @@ -11,7 +11,7 @@ # } # # Dependencies: -# pip install langchain_aws +# pip install langchain_aws langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - AWS_BEDROCK_ANTHROPIC_CLAUDE_MODEL_ID diff --git a/examples/llm/langchain_openai_with_history_chat.yaml b/examples/llm/langchain_openai_with_history_chat.yaml index bef45afd..5eb672f2 100755 --- a/examples/llm/langchain_openai_with_history_chat.yaml +++ b/examples/llm/langchain_openai_with_history_chat.yaml @@ -12,7 +12,7 @@ # It will then send an event back to Solace with the topic: `demo/joke/subject/response` # # Dependencies: -# pip install -U langchain_openai openai +# pip install -U langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - OPENAI_API_KEY diff --git a/examples/llm/mixture_of_agents.yaml b/examples/llm/mixture_of_agents.yaml index dd72e93e..cc92ee30 100644 --- a/examples/llm/mixture_of_agents.yaml +++ b/examples/llm/mixture_of_agents.yaml @@ -11,7 +11,7 @@ # NOTE: For horizontal scaling, partitioned queues must be used. This is not implemented in this example. # # Dependencies: -# pip install -U langchain-google-vertexai langchain_anthropic langchain_openai openai +# pip install -U langchain-google-vertexai langchain_anthropic langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - GOOGLE_APPLICATION_CREDENTIALS: the path to a service account JSON file diff --git a/examples/llm/openai_chat.yaml b/examples/llm/openai_chat.yaml index 54db782f..770fdedb 100755 --- a/examples/llm/openai_chat.yaml +++ b/examples/llm/openai_chat.yaml @@ -11,7 +11,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain_openai openai +# pip install -U langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - OPENAI_API_KEY diff --git a/examples/llm/openai_chroma_rag.yaml b/examples/llm/openai_chroma_rag.yaml index f78bfc09..6c94a40a 100644 --- a/examples/llm/openai_chroma_rag.yaml +++ b/examples/llm/openai_chroma_rag.yaml @@ -18,7 +18,7 @@ # The response will be sent to Solace topic `demo/rag/query/response` # # Dependencies: -# pip install -U langchain_openai openai chromadb langchain-chroma +# pip install -U langchain_openai openai chromadb langchain-chroma langchain-core~=0.3.0 langchain~=0.3.0 # # Required ENV variables: # - OPENAI_API_KEY diff --git a/examples/llm/openai_component_request_response.yaml b/examples/llm/openai_component_request_response.yaml index f00ec8e3..1cb83d68 100644 --- a/examples/llm/openai_component_request_response.yaml +++ b/examples/llm/openai_component_request_response.yaml @@ -28,7 +28,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain_openai openai +# pip install -U langchain_openai openai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - OPENAI_API_KEY diff --git a/examples/llm/vertexai_chat.yaml b/examples/llm/vertexai_chat.yaml index 19e77ece..c61578a0 100644 --- a/examples/llm/vertexai_chat.yaml +++ b/examples/llm/vertexai_chat.yaml @@ -11,7 +11,7 @@ # It will then send an event back to Solace with the topic: `demo/question/response` # # Dependencies: -# pip install -U langchain-google-vertexai +# pip install -U langchain-google-vertexai langchain-core~=0.3.0 langchain~=0.3.0 # # required ENV variables: # - GOOGLE_APPLICATION_CREDENTIALS: the path to a service account JSON file diff --git a/pyproject.toml b/pyproject.toml index e24e4284..2958e8dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,15 +19,13 @@ classifiers = [ ] dependencies = [ "boto3~=1.34.122", - "langchain-core~=0.3.0", - "langchain~=0.3.0", "PyYAML~=6.0.1", "Requests~=2.32.3", "solace_pubsubplus>=1.8.0", - "litellm~=1.51.3", "Flask~=3.0.3", "Flask-SocketIO~=5.4.1", "build~=1.2.2.post1", + "datadog~=0.50.2", ] [project.urls] @@ -43,7 +41,7 @@ solace-ai-connector-gen-docs = "solace_ai_connector.tools.gen_component_docs:mai [tool.hatch.envs.hatch-test] installer = "pip" -# # Specify minimum and maximum Python versions to test +# Specify minimum and maximum Python versions to test [[tool.hatch.envs.hatch-test.matrix]] python = ["3.10", "3.12"] diff --git a/requirements.txt b/requirements.txt index c7e90475..58c44a0e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,9 @@ boto3~=1.34.122 -langchain-core~=0.3.0 -langchain~=0.3.0 PyYAML~=6.0.1 Requests~=2.32.3 solace_pubsubplus~=1.8.0 -litellm~=1.51.3 Flask~=3.0.3 Flask-SocketIO~=5.4.1 -build~=1.2.2.post1 \ No newline at end of file +build~=1.2.2.post1 +datadog~=0.50.2 +SQLAlchemy~=2.0.36 \ No newline at end of file diff --git a/src/solace_ai_connector/components/__init__.py b/src/solace_ai_connector/components/__init__.py index d20da981..bfc358bc 100755 --- a/src/solace_ai_connector/components/__init__.py +++ b/src/solace_ai_connector/components/__init__.py @@ -16,7 +16,6 @@ delay, iterate, message_filter, - parser, ) from .general.for_testing import ( @@ -25,26 +24,7 @@ give_ack_output, ) -from .general.llm.langchain import ( - langchain_embeddings, - langchain_vector_store_delete, - langchain_chat_model, - langchain_chat_model_with_history, - langchain_vector_store_embedding_index, - langchain_vector_store_embedding_search, -) - -from .general.llm.litellm import ( - litellm_chat_model, - litellm_embeddings, - litellm_chat_model_with_history, -) - -from .general.websearch import ( - websearch_duckduckgo, - websearch_google, - websearch_bing -) +from .general.websearch import (websearch_duckduckgo, websearch_google, websearch_bing) # Also import the components from the submodules from .inputs_outputs.error_input import ErrorInput @@ -62,20 +42,6 @@ from .general.delay import Delay from .general.iterate import Iterate from .general.message_filter import MessageFilter -from .general.parser import Parser -from .general.llm.langchain.langchain_base import LangChainBase -from .general.llm.langchain.langchain_embeddings import LangChainEmbeddings -from .general.llm.langchain.langchain_vector_store_delete import LangChainVectorStoreDelete -from .general.llm.langchain.langchain_chat_model import LangChainChatModel -from .general.llm.langchain.langchain_chat_model_with_history import ( - LangChainChatModelWithHistory, -) -from .general.llm.langchain.langchain_vector_store_embedding_index import ( - LangChainVectorStoreEmbeddingsIndex, -) -from .general.llm.langchain.langchain_vector_store_embedding_search import ( - LangChainVectorStoreEmbeddingsSearch, -) from .general.websearch.websearch_duckduckgo import WebSearchDuckDuckGo from .general.websearch.websearch_google import WebSearchGoogle -from .general.websearch.websearch_bing import WebSearchBing \ No newline at end of file +from .general.websearch.websearch_bing import WebSearchBing From ad742f7c219c86a4d357c06a9835abfb173ab0c2 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 6 Dec 2024 16:38:41 -0500 Subject: [PATCH 17/54] feat: update monitoring --- examples/llm/litellm_chat.yaml | 11 ++- src/solace_ai_connector/common/monitoring.py | 74 +++++++++---------- .../components/component_base.py | 42 +++++++++-- .../components/inputs_outputs/broker_base.py | 8 +- 4 files changed, 82 insertions(+), 53 deletions(-) diff --git a/examples/llm/litellm_chat.yaml b/examples/llm/litellm_chat.yaml index 83ba283a..918c6339 100644 --- a/examples/llm/litellm_chat.yaml +++ b/examples/llm/litellm_chat.yaml @@ -33,9 +33,16 @@ --- log: - stdout_log_level: INFO + stdout_log_level: DEBUG log_file_level: DEBUG - log_file: solace_ai_connector.log + log_file: ${LOG_FILE} + log_format: jsonl + logback: + rollingpolicy: + file-name-pattern: "${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" + max-file-size: 600B + max-history: 2 + total-size-cap: 21KB #1GB shared_config: - broker_config: &broker_connection diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 5ba3ae8c..08a56f27 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -1,18 +1,16 @@ -from typing import Any -from datadog import initialize, statsd - -from .log import log +from typing import Any, List class Monitoring: """ - A singleton class to collect and send metrics to Datadog. + A singleton class to collect and send metrics. """ _instance = None _initialized = False _ready = False _live = False + _interval = 60 def __new__(cls, *args, **kwargs): if not cls._instance: @@ -29,37 +27,8 @@ def __init__(self, config: dict[str, Any] = None) -> None: if self._initialized: return - self.enabled = False - - monitoring = config.get("monitoring", {}) - if monitoring is not {}: - self.enabled = monitoring.get("enabled", False) - tags = monitoring.get("tags", []) - if "host" not in monitoring: - log.error( - "Monitoring configuration is missing host. Disabling monitoring." - ) - self.enabled = False - else: - host = monitoring.get("host") - if "port" not in monitoring: - log.error( - "Monitoring configuration is missing port. Disabling monitoring." - ) - self.enabled = False - else: - port = monitoring.get("port") - - # Initialize Datadog with provided options - if self.enabled: - options = { - "statsd_constant_tags": tags, - "statsd_host": host, - "statsd_port": port, - } - - initialize(**options) self._initialized = True + self._collected_metrics = {} def set_readiness(self, ready: bool) -> None: """ @@ -77,12 +46,35 @@ def set_liveness(self, live: bool) -> None: """ self._live = live - def send_metric(self, metric_name: str, metric_value: Any) -> None: + def set_interval(self, interval: int) -> None: + """ + Set the interval for the MetricCollector. + + :param interval: Interval + """ + self._interval = interval + + def get_interval(self) -> int: + """ + Get the interval for the MetricCollector. + + :return: Interval + """ + return self._interval + + def collect_metrics(self, metrics: dict[dict[str, Any]]) -> None: + """ + Collect metrics. + + :param metrics: List of metrics + """ + for key, value in metrics.items(): + self._collected_metrics[key] = value + + def get_collected_metrics(self) -> List[dict[str, Any]]: """ - Send a metric to Datadog. + Retrieve collected metrics. - :param metric_name: Name of the metric - :param metric_value: Value of the metric + :return: Dictionary of collected metrics """ - if self.enabled: - statsd.gauge(metric_name, metric_value) + return self._collected_metrics diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 1a7b1115..dc907588 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -480,22 +480,48 @@ def do_broker_request_response( f"Broker request response controller not found for component {self.name}" ) + def get_metrics_with_header(self) -> dict[dict[str, Any], Any]: + metrics = {} + + pure_metrics = self.get_metrics() + for metric, value in pure_metrics.items(): + key = tuple( + [ + ("flow", self.flow_name), + ("flow_index", self.index), + ("component", self.name), + ("component_index", self.component_index), + ("metric", metric), + ] + ) + + value = {"value": value, "timestamp": int(time.time())} + log.debug( + "Metrics - flow: %s, component: %s, metric: %s, value: %s", + self.flow_name, + self.name, + metric, + value, + ) + + metrics[key] = value + return metrics + def get_metrics(self) -> dict[str, Any]: return {} def run_micro_monitoring(self) -> None: """ - Start the metric collection and sending process in a loop. + Start the metric collection process in a loop. """ monitoring = Monitoring() try: while not self.stop_signal.is_set(): - # Collect and send metrics every 10 seconds - metrics = self.get_metrics() - for metric_name, metric_value in metrics.items(): - pass - # monitoring.send_metric(metric_name, metric_value) - # log.debug("Sent metric %s: %s", metric_name, metric_value) - time.sleep(60) + # Collect metrics + metrics = self.get_metrics_with_header() + monitoring.collect_metrics(metrics) + # Wait for the next interval + sleep_interval = monitoring.get_interval() + time.sleep(sleep_interval) except KeyboardInterrupt: log.info("Monitoring stopped.") diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index f9cf10c1..d4f67ea8 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -112,7 +112,11 @@ def generate_uuid(self): return str(uuid.uuid4()) def get_metrics(self): + required_metrics = ["SOLCLIENT_STATS_RX_ACKED"] + stats_dict = {} metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() - str_metrics = str(metrics) - stats_dict = json.loads(str_metrics) + for metric_key in required_metrics: + metric = Metric(metric_key) + stats_dict[metric_key] = metrics.get_value(Metric(metric)) + return stats_dict From d93e5de3eeebb9300554ade6e343574b25bf1fa2 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 9 Dec 2024 11:01:42 -0500 Subject: [PATCH 18/54] feat: drop error messages when the queue is full --- examples/error_handler.yaml | 3 +++ .../components/inputs_outputs/error_input.py | 27 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/examples/error_handler.yaml b/examples/error_handler.yaml index f2277949..8c4a97b4 100644 --- a/examples/error_handler.yaml +++ b/examples/error_handler.yaml @@ -10,6 +10,8 @@ # } # If value is not a number, the error will be caught, logged to file and send back to the Solace broker. # +# Subscribe to `ai_connector_error/*/*/*` to see the error messages. +# # required ENV variables: # - SOLACE_BROKER_URL # - SOLACE_BROKER_USERNAME @@ -38,6 +40,7 @@ flows: - component_name: error_input component_module: error_input component_config: + max_queue_depth: 100 - component_name: error_logger component_module: file_output input_transforms: diff --git a/src/solace_ai_connector/components/inputs_outputs/error_input.py b/src/solace_ai_connector/components/inputs_outputs/error_input.py index 62720134..2c71fcc2 100644 --- a/src/solace_ai_connector/components/inputs_outputs/error_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/error_input.py @@ -28,6 +28,15 @@ ), "default": None, }, + { + "name": "max_queue_depth", + "required": False, + "description": ( + "Maximum number of messages that can be queued in the input queue." + "If the queue is full, the new message is dropped." + ), + "default": 1000, + }, ], "output_schema": { "type": "object", @@ -100,9 +109,11 @@ class ErrorInput(ComponentBase): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.max_rate = self.get_config("max_rate") + self.max_queue_depth = self.get_config("max_queue_depth") self.error_count_in_last_second = 0 self.error_count_start_time = time.time() @@ -112,7 +123,10 @@ def __init__(self, **kwargs): self.error_queue = None def invoke(self, message, data): - if self.discard_message_due_to_input_rate(): + if ( + self.discard_message_due_to_input_rate() + or self.discard_message_due_to_full_queue() + ): return None return data @@ -135,5 +149,16 @@ def discard_message_due_to_input_rate(self): return True return False + def discard_message_due_to_full_queue(self): + if self.input_queue.qsize() < self.max_queue_depth: + return False + + log.warning( + "Discarding error message due to queue size. " + "Error queue reached max queue depth of %d.", + self.max_queue_depth, + ) + return True + def get_input_data(self, message): return message.get_data("input.payload") From dfe4219ae14c9a82176a693f970e0f7a13f67ec9 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 9 Dec 2024 18:40:48 -0500 Subject: [PATCH 19/54] feat: add a text splitter component --- examples/llm/openai_chroma_rag.yaml | 38 +++++++- .../llm/langchain/langchain_split_text.py | 90 +++++++++++++++++++ 2 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py diff --git a/examples/llm/openai_chroma_rag.yaml b/examples/llm/openai_chroma_rag.yaml index f78bfc09..57c29d55 100644 --- a/examples/llm/openai_chroma_rag.yaml +++ b/examples/llm/openai_chroma_rag.yaml @@ -7,7 +7,7 @@ # Load Data: # Send data to Solace topic `demo/rag/data` with the following payload format: # { -# "texts": [. , ...] +# "text": text # } # # RAG Query: @@ -61,6 +61,22 @@ flows: payload_encoding: utf-8 payload_format: json + # Split text + - component_name: text_splitter + component_module: langchain_split_text + component_config: + langchain_module: langchain_text_splitters + langchain_class: TokenTextSplitter + langchain_component_config: + chunk_size: 10 + chunk_overlap: 1 + input_transforms: + - type: copy + source_expression: input.payload:text + dest_expression: user_data.input:text + input_selection: + source_expression: user_data.input + # Embedding data & ChromaDB ingest - component_name: chroma_embed component_module: langchain_vector_store_embedding_index @@ -81,11 +97,29 @@ flows: source_value: topic:demo/rag/data dest_expression: user_data.vector_input:metadatas.source - type: copy - source_expression: input.payload:texts + source_expression: previous dest_expression: user_data.vector_input:texts input_selection: source_expression: user_data.vector_input + # Send response back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output:payload + - type: copy + source_expression: template:demo/rag/response + dest_expression: user_data.output:topic + input_selection: + source_expression: user_data.output + # RAG Inference flow - name: OpenAI_RAG components: diff --git a/src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py b/src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py new file mode 100644 index 00000000..c5eac049 --- /dev/null +++ b/src/solace_ai_connector/components/general/llm/langchain/langchain_split_text.py @@ -0,0 +1,90 @@ +# This component splits a long text into smaller parts using the LangChain text splitter module + +from .....common.log import log + +from .langchain_base import ( + LangChainBase, +) + + +info = { + "class_name": "LangChainTextSplitter", + "description": "Split a long text into smaller parts using the LangChain text splitter module", + "config_parameters": [ + { + "name": "langchain_module", + "required": True, + "description": "The text split module - e.g. 'langchain_text_splitters'", + }, + { + "name": "langchain_class", + "required": True, + "description": "The text split class to use - e.g. TokenTextSplitter", + }, + { + "name": "langchain_component_config", + "required": True, + "description": "Model specific configuration for the text splitting. " + "See documentation for valid parameter names." + "https://python.langchain.com/docs/how_to/split_by_token/#nltk", + }, + ], + "input_schema": { + "type": "object", + "properties": { + "text": { + "type": "string", + }, + }, + "required": ["text"], + }, + "output_schema": { + "type": "array", + "items": { + "type": "string", + }, + "description": ("A list of the split text"), + }, +} + + +class LangChainTextSplitter(LangChainBase): + """ + A class to split a long text into smaller parts using the LangChain text splitter module. + + This class inherits from LangChainBase and utilizes the LangChain text splitter module + to divide a given text into smaller segments based on the specified configuration. + """ + + def __init__(self, **kwargs): + """ + Initialize the LangChainTextSplitter with the provided configuration. + + Args: + **kwargs: Arbitrary keyword arguments containing configuration parameters. + """ + super().__init__(info, **kwargs) + + def invoke(self, message, data): + """ + Split the provided text into smaller parts using the LangChain text splitter module. + + Args: + message (Message): The message object containing metadata. + data (dict): A dictionary containing the input text to be split. + + Returns: + list: A list of strings representing the split text segments. + """ + if "text" not in data: + log.error("Text not provided in input data") + return [] + + try: + text = data.get("text") + texts = self.component.split_text(text) + log.debug(f"Split text: {texts}") + return texts + except Exception as e: + log.error(f"Error splitting text: {str(e)}") + return [] From 9648a94c5e68c29e3b9115301a4314687aec4b62 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 9 Dec 2024 18:49:43 -0500 Subject: [PATCH 20/54] feat: updated docs --- docs/components/broker_request_response.md | 8 ++++ docs/components/error_input.md | 2 + docs/components/index.md | 1 + docs/components/langchain_chat_model.md | 6 +++ .../langchain_chat_model_with_history.md | 6 +++ docs/components/langchain_split_text.md | 42 +++++++++++++++++++ docs/components/litellm_chat_model.md | 8 ++-- .../litellm_chat_model_with_history.md | 8 ++-- docs/components/litellm_embeddings.md | 6 +-- docs/components/openai_chat_model.md | 6 ++- .../openai_chat_model_with_history.md | 4 +- 11 files changed, 85 insertions(+), 12 deletions(-) create mode 100644 docs/components/langchain_split_text.md diff --git a/docs/components/broker_request_response.md b/docs/components/broker_request_response.md index 30ee6167..a408e2e7 100644 --- a/docs/components/broker_request_response.md +++ b/docs/components/broker_request_response.md @@ -21,6 +21,10 @@ component_config: request_expiry_ms: streaming: streaming_complete_expression: + streaming: + streaming_complete_expression: + streaming: + streaming_complete_expression: ``` | Parameter | Required | Default | Description | @@ -38,6 +42,10 @@ component_config: | request_expiry_ms | False | 60000 | Expiry time for cached requests in milliseconds | | streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | | streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | +| streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | +| streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | +| streaming | False | | The response will arrive in multiple pieces. If True, the streaming_complete_expression must be set and will be used to determine when the last piece has arrived. | +| streaming_complete_expression | False | | The source expression to determine when the last piece of a streaming response has arrived. | ## Component Input Schema diff --git a/docs/components/error_input.md b/docs/components/error_input.md index de06a883..a4bb24cc 100644 --- a/docs/components/error_input.md +++ b/docs/components/error_input.md @@ -9,11 +9,13 @@ component_name: component_module: error_input component_config: max_rate: + max_queue_depth: ``` | Parameter | Required | Default | Description | | --- | --- | --- | --- | | max_rate | False | None | Maximum rate of errors to process per second. Any errors above this rate will be dropped. If not set, all errors will be processed. | +| max_queue_depth | False | 1000 | Maximum number of messages that can be queued in the input queue.If the queue is full, the new message is dropped. | diff --git a/docs/components/index.md b/docs/components/index.md index 1b3b8516..292ada2e 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -14,6 +14,7 @@ | [langchain_chat_model](langchain_chat_model.md) | Provide access to all the LangChain chat models via configuration | | [langchain_chat_model_with_history](langchain_chat_model_with_history.md) | A chat model based on LangChain that includes keeping per-session history of the conversation. Note that this component will only take the first system message and the first human message in the messages array. | | [langchain_embeddings](langchain_embeddings.md) | Provide access to all the LangChain Text Embeddings components via configuration | +| [langchain_split_text](langchain_split_text.md) | Split a long text into smaller parts using the LangChain text splitter module | | [langchain_vector_store_delete](langchain_vector_store_delete.md) | This component allows for entries in a LangChain Vector Store to be deleted. This is needed for the continued maintenance of the vector store. Due to the nature of langchain vector stores, you need to specify an embedding component even though it is not used in this component. | | [langchain_vector_store_embedding_index](langchain_vector_store_embedding_index.md) | Use LangChain Vector Stores to index text for later semantic searches. This will take text, run it through an embedding model and then store it in a vector database. | | [langchain_vector_store_embedding_search](langchain_vector_store_embedding_search.md) | Use LangChain Vector Stores to search a vector store with a semantic search. This will take text, run it through an embedding model with a query embedding and then find the closest matches in the store. | diff --git a/docs/components/langchain_chat_model.md b/docs/components/langchain_chat_model.md index b65dea59..fda44bf6 100644 --- a/docs/components/langchain_chat_model.md +++ b/docs/components/langchain_chat_model.md @@ -11,6 +11,9 @@ component_config: langchain_module: langchain_class: langchain_component_config: + llm_mode: + stream_to_flow: + stream_batch_size: llm_response_format: ``` @@ -19,6 +22,9 @@ component_config: | langchain_module | True | | The chat model module - e.g. 'langchain_openai.chat_models' | | langchain_class | True | | The chat model class to use - e.g. ChatOpenAI | | langchain_component_config | True | | Model specific configuration for the chat model. See documentation for valid parameter names. | +| llm_mode | False | | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | llm_response_format | False | | The response format for this LLM request. This can be 'json', 'yaml', or 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate parser and the fields will be available in the response object. If set to 'text', the response will be returned as a string. | diff --git a/docs/components/langchain_chat_model_with_history.md b/docs/components/langchain_chat_model_with_history.md index 8686061e..e7483225 100644 --- a/docs/components/langchain_chat_model_with_history.md +++ b/docs/components/langchain_chat_model_with_history.md @@ -11,6 +11,9 @@ component_config: langchain_module: langchain_class: langchain_component_config: + llm_mode: + stream_to_flow: + stream_batch_size: llm_response_format: history_max_turns: history_max_message_size: @@ -27,6 +30,9 @@ component_config: | langchain_module | True | | The chat model module - e.g. 'langchain_openai.chat_models' | | langchain_class | True | | The chat model class to use - e.g. ChatOpenAI | | langchain_component_config | True | | Model specific configuration for the chat model. See documentation for valid parameter names. | +| llm_mode | False | | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | llm_response_format | False | | The response format for this LLM request. This can be 'json', 'yaml', or 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate parser and the fields will be available in the response object. If set to 'text', the response will be returned as a string. | | history_max_turns | False | 20 | The maximum number of turns to keep in the history. If not set, the history will be limited to 20 turns. | | history_max_message_size | False | 1000 | The maximum amount of characters to keep in a single message in the history. | diff --git a/docs/components/langchain_split_text.md b/docs/components/langchain_split_text.md new file mode 100644 index 00000000..aa313039 --- /dev/null +++ b/docs/components/langchain_split_text.md @@ -0,0 +1,42 @@ +# LangChainTextSplitter + +Split a long text into smaller parts using the LangChain text splitter module + +## Configuration Parameters + +```yaml +component_name: +component_module: langchain_split_text +component_config: + langchain_module: + langchain_class: + langchain_component_config: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| langchain_module | True | | The text split module - e.g. 'langchain_text_splitters' | +| langchain_class | True | | The text split class to use - e.g. TokenTextSplitter | +| langchain_component_config | True | | Model specific configuration for the text splitting. See documentation for valid parameter names.https://python.langchain.com/docs/how_to/split_by_token/#nltk | + + +## Component Input Schema + +``` +{ + text: +} +``` +| Field | Required | Description | +| --- | --- | --- | +| text | True | | + + +## Component Output Schema + +``` +[ + , + ... +] +``` diff --git a/docs/components/litellm_chat_model.md b/docs/components/litellm_chat_model.md index acd19851..e617a772 100644 --- a/docs/components/litellm_chat_model.md +++ b/docs/components/litellm_chat_model.md @@ -11,11 +11,11 @@ component_config: load_balancer: embedding_params: temperature: + set_response_uuid_in_user_properties: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: - set_response_uuid_in_user_properties: history_max_turns: history_max_time: history_max_turns: @@ -31,11 +31,11 @@ component_config: | load_balancer | False | | Add a list of models to load balancer. | | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | -| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | @@ -57,6 +57,7 @@ component_config: }, ... ], + stream: , clear_history_but_keep_depth: } ``` @@ -65,6 +66,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response - overwrites llm_mode | | clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. | diff --git a/docs/components/litellm_chat_model_with_history.md b/docs/components/litellm_chat_model_with_history.md index 29aa640c..67ca587b 100644 --- a/docs/components/litellm_chat_model_with_history.md +++ b/docs/components/litellm_chat_model_with_history.md @@ -11,11 +11,11 @@ component_config: load_balancer: embedding_params: temperature: + set_response_uuid_in_user_properties: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: - set_response_uuid_in_user_properties: history_max_turns: history_max_time: history_max_turns: @@ -27,11 +27,11 @@ component_config: | load_balancer | False | | Add a list of models to load balancer. | | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | -| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | @@ -49,6 +49,7 @@ component_config: }, ... ], + stream: , clear_history_but_keep_depth: } ``` @@ -57,6 +58,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response - overwrites llm_mode | | clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. | diff --git a/docs/components/litellm_embeddings.md b/docs/components/litellm_embeddings.md index 83930083..4e3e739e 100644 --- a/docs/components/litellm_embeddings.md +++ b/docs/components/litellm_embeddings.md @@ -11,11 +11,11 @@ component_config: load_balancer: embedding_params: temperature: + set_response_uuid_in_user_properties: stream_to_flow: stream_to_next_component: llm_mode: stream_batch_size: - set_response_uuid_in_user_properties: history_max_turns: history_max_time: history_max_turns: @@ -31,11 +31,11 @@ component_config: | load_balancer | False | | Add a list of models to load balancer. | | embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input | | temperature | False | 0.7 | Sampling temperature to use | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | -| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | | history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | diff --git a/docs/components/openai_chat_model.md b/docs/components/openai_chat_model.md index e41c6692..62120978 100644 --- a/docs/components/openai_chat_model.md +++ b/docs/components/openai_chat_model.md @@ -27,7 +27,7 @@ component_config: | base_url | False | None | Base URL for OpenAI API | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | @@ -42,7 +42,8 @@ component_config: content: }, ... - ] + ], + stream: } ``` | Field | Required | Description | @@ -50,6 +51,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response. It is is not provided, it will default to the value of llm_mode. | ## Component Output Schema diff --git a/docs/components/openai_chat_model_with_history.md b/docs/components/openai_chat_model_with_history.md index 9c7c4dc3..ce306a5b 100644 --- a/docs/components/openai_chat_model_with_history.md +++ b/docs/components/openai_chat_model_with_history.md @@ -29,7 +29,7 @@ component_config: | base_url | False | None | Base URL for OpenAI API | | stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. | | stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. | -| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| llm_mode | False | none | The mode for streaming results: 'none' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | | stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | | set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | | history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | @@ -47,6 +47,7 @@ component_config: }, ... ], + stream: , clear_history_but_keep_depth: } ``` @@ -55,6 +56,7 @@ component_config: | messages | True | | | messages[].role | True | | | messages[].content | True | | +| stream | False | Whether to stream the response. It is is not provided, it will default to the value of llm_mode. | | clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. | From 4eb4beffb8725424986b29b07fab04984ce11b28 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 10 Dec 2024 12:00:12 -0500 Subject: [PATCH 21/54] fix: resolve graceful termination issues --- .../components/component_base.py | 6 ++--- .../llm/openai/openai_chat_model_base.py | 4 ++-- .../components/inputs_outputs/timer_input.py | 3 ++- src/solace_ai_connector/main.py | 9 ++++---- .../solace_ai_connector.py | 23 ++++++++++++++++++- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index dc907588..ef62154b 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -89,14 +89,14 @@ def run(self): self.reset_sleep_time() except AssertionError as e: try: - time.sleep(self.event_message_repeat_sleep_time) + self.stop_signal.wait(timeout=self.event_message_repeat_sleep_time) except KeyboardInterrupt: self.handle_component_error(e, event) self.grow_sleep_time() self.handle_component_error(e, event) except Exception as e: try: - time.sleep(self.event_message_repeat_sleep_time) + self.stop_signal.wait(timeout=self.event_message_repeat_sleep_time) except KeyboardInterrupt: self.handle_component_error(e, event) self.grow_sleep_time() @@ -522,6 +522,6 @@ def run_micro_monitoring(self) -> None: monitoring.collect_metrics(metrics) # Wait for the next interval sleep_interval = monitoring.get_interval() - time.sleep(sleep_interval) + self.stop_signal.wait(timeout=sleep_interval) except KeyboardInterrupt: log.info("Monitoring stopped.") diff --git a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py index 012d1dfe..3df74580 100644 --- a/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py +++ b/src/solace_ai_connector/components/general/llm/openai/openai_chat_model_base.py @@ -185,7 +185,7 @@ def invoke(self, message, data): if max_retries <= 0: raise e else: - time.sleep(1) + self.stop_signal.wait(timeout=1) def invoke_stream(self, client, message, messages): response_uuid = str(uuid.uuid4()) @@ -239,7 +239,7 @@ def invoke_stream(self, client, message, messages): raise e else: # Small delay before retrying - time.sleep(1) + self.stop_signal.wait(timeout=1) if self.stream_to_next_component: # Just return the last chunk diff --git a/src/solace_ai_connector/components/inputs_outputs/timer_input.py b/src/solace_ai_connector/components/inputs_outputs/timer_input.py index 756d44f3..38a52e58 100644 --- a/src/solace_ai_connector/components/inputs_outputs/timer_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/timer_input.py @@ -36,6 +36,7 @@ class TimerInput(ComponentBase): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.interval_ms = self.get_config("interval_ms") @@ -61,7 +62,7 @@ def get_next_message(self): else: # Sleep for the remaining time sleep_time = (self.interval_ms - delta_time) / 1000 - time.sleep(sleep_time) + self.stop_signal.wait(timeout=sleep_time) self.last_message_time = self.get_current_time() return Message(payload={}) diff --git a/src/solace_ai_connector/main.py b/src/solace_ai_connector/main.py index e345244a..2cb96e75 100644 --- a/src/solace_ai_connector/main.py +++ b/src/solace_ai_connector/main.py @@ -110,7 +110,7 @@ def shutdown(): app.stop() app.cleanup() print("Solace AI Connector exited successfully!") - os._exit(0) + sys.exit(0) signal.signal(signal.SIGINT, lambda s, f: shutdown()) signal.signal(signal.SIGTERM, lambda s, f: shutdown()) @@ -118,11 +118,10 @@ def shutdown(): # Start the application try: app.run() - except KeyboardInterrupt: - shutdown() - - try: app.wait_for_flows() + except Exception as e: + print(f"Error running Solace AI Connector: {e}", file=sys.stderr) + shutdown() except KeyboardInterrupt: shutdown() diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index ad059bdb..1a83e2ec 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -109,15 +109,32 @@ def cleanup(self): """Clean up resources and ensure all threads are properly joined""" log.info("Cleaning up Solace AI Event Connector") for flow in self.flows: - flow.cleanup() + try: + flow.cleanup() + except Exception as e: + log.error(f"Error cleaning up flow: {e}") self.flows.clear() + + # Clean up queues + for queue_name, queue in self.flow_input_queues.items(): + try: + while not queue.empty(): + queue.get_nowait() + except Exception as e: + log.error(f"Error cleaning queue {queue_name}: {e}") + self.flow_input_queues.clear() + if hasattr(self, "trace_queue") and self.trace_queue: self.trace_queue.put(None) # Signal the trace thread to stop if self.trace_thread: self.trace_thread.join() if hasattr(self, "cache_check_thread"): self.cache_check_thread.join() + if hasattr(self, "error_queue"): + self.error_queue.put(None) + self.timer_manager.cleanup() + log.info("Cleanup completed") def setup_logging(self): """Setup logging""" @@ -230,10 +247,14 @@ def stop(self): """Stop the Solace AI Event Connector""" log.info("Stopping Solace AI Event Connector") self.stop_signal.set() + + # Stop core services first self.timer_manager.stop() # Stop the timer manager first self.cache_service.stop() # Stop the cache service + if self.trace_thread: self.trace_thread.join() + # Update monitoring last self.monitoring.set_liveness(False) self.monitoring.set_readiness(False) From 488de440c873c6f7baacfecdee93ad3df48dbad9 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Wed, 11 Dec 2024 08:33:03 -0500 Subject: [PATCH 22/54] fix: remove payloads from logs --- src/solace_ai_connector/components/component_base.py | 4 +--- .../components/inputs_outputs/broker_input.py | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index ef62154b..2ebc42d8 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -136,9 +136,7 @@ def get_next_event(self): timeout = self.queue_timeout_ms or DEFAULT_QUEUE_TIMEOUT_MS event = self.input_queue.get(timeout=timeout / 1000) log.debug( - "%sComponent received event %s from input queue", - self.log_identifier, - event, + "%sComponent received event from input queue", self.log_identifier ) return event except queue.Empty: diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 38be39f4..670519d6 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -88,6 +88,7 @@ class BrokerInput(BrokerBase): + def __init__(self, module_info=None, **kwargs): module_info = module_info or info super().__init__(module_info, **kwargs) @@ -123,12 +124,7 @@ def get_next_message(self, timeout_ms=None): topic = broker_message.get("topic") user_properties = broker_message.get("user_properties", {}) - log.debug( - "Received message from broker: topic=%s, user_properties=%s, payload length=%d", - topic, - user_properties, - len(payload) if payload is not None else 0, - ) + log.debug("Received message from broker: topic=%s", topic) return Message(payload=payload, topic=topic, user_properties=user_properties) def acknowledge_message(self, broker_message): From e712b9b0d8a89c7efd32715cb8bf3286931de63f Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 16 Dec 2024 15:28:18 -0500 Subject: [PATCH 23/54] feat: add the forever retry --- examples/llm/litellm_chat.yaml | 2 + .../common/messaging/solace_messaging.py | 51 ++++++++++++++++--- .../components/inputs_outputs/broker_base.py | 5 ++ 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/examples/llm/litellm_chat.yaml b/examples/llm/litellm_chat.yaml index 83ba283a..9ad2a259 100644 --- a/examples/llm/litellm_chat.yaml +++ b/examples/llm/litellm_chat.yaml @@ -44,6 +44,8 @@ shared_config: broker_username: ${SOLACE_BROKER_USERNAME} broker_password: ${SOLACE_BROKER_PASSWORD} broker_vpn: ${SOLACE_BROKER_VPN} + reconnection_strategy: forever_retry # options: forever_retry, parametrized_retry + retry_interval: 1000 # in milliseconds # Take from input broker and publish back to Solace flows: diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 875e677b..1eed469f 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -130,15 +130,50 @@ def connect(self): or os.path.dirname(certifi.where()) or "/usr/share/ca-certificates/mozilla/", } - # print (f"Broker Properties: {self.broker_properties}") - self.messaging_service = ( - MessagingService.builder() - .from_properties(broker_props) - .with_reconnection_retry_strategy( - RetryStrategy.parametrized_retry(20, 3000) + strategy = self.broker_properties.get("reconnection_strategy") + if strategy and strategy == "forever_retry": + retry_interval = self.broker_properties.get("retry_interval") + if not retry_interval: + log.warning("retry_interval not provided, using default value of 3000") + retry_interval = 3000 + self.messaging_service = ( + MessagingService.builder() + .from_properties(broker_props) + .with_reconnection_retry_strategy( + RetryStrategy.forever_retry(retry_interval) + ) + .build() + ) + elif strategy and strategy == "parametrized_retry": + retry_count = self.broker_properties.get("retry_count") + retry_wait = self.broker_properties.get("retry_wait") + if not retry_count: + log.warning("retry_count not provided, using default value of 20") + retry_count = 20 + if not retry_wait: + log.warning("retry_wait not provided, using default value of 3000") + retry_wait = 3000 + self.messaging_service = ( + MessagingService.builder() + .from_properties(broker_props) + .with_reconnection_retry_strategy( + RetryStrategy.parametrized_retry(retry_count, retry_wait) + ) + .build() + ) + else: + # default + log.info( + "Using default reconnection strategy. 20 retries with 3000ms interval" + ) + self.messaging_service = ( + MessagingService.builder() + .from_properties(broker_props) + .with_reconnection_retry_strategy( + RetryStrategy.parametrized_retry(20, 3000) + ) + .build() ) - .build() - ) # Blocking connect thread self.messaging_service.connect() diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index c312740b..7502fba1 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -30,6 +30,7 @@ class BrokerBase(ComponentBase): + def __init__(self, module_info, **kwargs): super().__init__(module_info, **kwargs) self.broker_properties = self.get_broker_properties() @@ -94,6 +95,10 @@ def get_broker_properties(self): "subscriptions": self.get_config("broker_subscriptions"), "trust_store_path": self.get_config("trust_store_path"), "temporary_queue": self.get_config("temporary_queue"), + "reconnection_strategy": self.get_config("reconnection_strategy"), + "retry_interval": self.get_config("retry_interval"), + "retry_count": self.get_config("retry_count"), + "retry_interval": self.get_config("retry_interval"), } return broker_properties From 5ff264a7703db6be1da743069d421dccc88eeadc Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 19 Dec 2024 11:13:05 -0500 Subject: [PATCH 24/54] feat: keep connecting --- .../components/inputs_outputs/broker_base.py | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 7502fba1..dfe2f985 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -8,6 +8,7 @@ from ...common.message import Message from ...common.messaging.messaging_builder import MessagingServiceBuilder from ...common.utils import encode_payload, decode_payload +from ...common.log import log # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes @@ -44,20 +45,32 @@ def __init__(self, module_info, **kwargs): self.messages_to_ack = [] self.connected = False self.needs_acknowledgement = True + self.connection_repeat_sleep_time = 5 @abstractmethod def invoke(self, message, data): pass def connect(self): - if not self.connected: - self.messaging_service.connect() - self.connected = True + while not self.stop_signal.is_set(): + if not self.connected: + try: + self.messaging_service.connect() + self.connected = self.messaging_service.is_connected + except Exception as e: + log.error( + f"Error connecting to broker: {e}. \n Retrying in {self.connection_repeat_sleep_time} seconds." + ) + self.stop_signal.wait(timeout=self.connection_repeat_sleep_time) + self.grow_sleep_time() + else: + self.reset_sleep_time() + break def disconnect(self): if self.connected: self.messaging_service.disconnect() - self.connected = False + self.connected = self.messaging_service.is_connected def stop_component(self): self.disconnect() @@ -110,3 +123,12 @@ def start(self): def generate_uuid(self): return str(uuid.uuid4()) + + def grow_sleep_time(self): + if self.connection_repeat_sleep_time < 60: + self.connection_repeat_sleep_time *= 2 + if self.connection_repeat_sleep_time > 60: + self.connection_repeat_sleep_time = 60 + + def reset_sleep_time(self): + self.connection_repeat_sleep_time = 1 From f9fa6b974c2dce696cc3de35f10ace46a1d9e814 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 19 Dec 2024 13:23:59 -0500 Subject: [PATCH 25/54] Feat: add monitoring --- src/solace_ai_connector/common/monitoring.py | 19 ++++++++++++++++--- .../components/inputs_outputs/broker_base.py | 9 +++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 08a56f27..a00ea32d 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -1,4 +1,9 @@ from typing import Any, List +from enum import Enum + + +class Metrics(Enum): + SOLCLIENT_STATS_RX_ACKED = 0 class Monitoring: @@ -30,6 +35,14 @@ def __init__(self, config: dict[str, Any] = None) -> None: self._initialized = True self._collected_metrics = {} + def set_required_metrics(self, required_metrics: List[Metrics]) -> None: + """ + Set the required metrics for the MetricCollector. + + :param required_metrics: List of required metrics + """ + self._required_metrics = required_metrics + def set_readiness(self, ready: bool) -> None: """ Set the readiness status of the MetricCollector. @@ -62,14 +75,14 @@ def get_interval(self) -> int: """ return self._interval - def collect_metrics(self, metrics: dict[dict[str, Any]]) -> None: + def collect_metrics(self, metrics: dict[Metrics, dict[str, Any]]) -> None: """ Collect metrics. - :param metrics: List of metrics + :param metrics: Dictionary of metrics """ for key, value in metrics.items(): - self._collected_metrics[key] = value + self._collected_metrics[key.value] = value def get_collected_metrics(self) -> List[dict[str, Any]]: """ diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 4bdfa55b..df547201 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -6,12 +6,13 @@ from abc import abstractmethod -from solace.messaging.utils.manageable import ApiMetrics, Metric +from solace.messaging.utils.manageable import ApiMetrics, Metric as SolaceMetrics from ..component_base import ComponentBase from ...common.message import Message from ...common.messaging.messaging_builder import MessagingServiceBuilder from ...common.utils import encode_payload, decode_payload from ...common.log import log +from ...common.monitoring import Metrics # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes @@ -128,12 +129,12 @@ def generate_uuid(self): return str(uuid.uuid4()) def get_metrics(self): - required_metrics = ["SOLCLIENT_STATS_RX_ACKED"] + required_metrics = [Metrics.SOLCLIENT_STATS_RX_ACKED] stats_dict = {} metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() for metric_key in required_metrics: - metric = Metric(metric_key) - stats_dict[metric_key] = metrics.get_value(Metric(metric)) + metric = SolaceMetrics(metric_key) + stats_dict[metric_key] = metrics.get_value(SolaceMetrics(metric)) return stats_dict From a589776cb82518b7b8baefda68935fb87324d3c9 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 19 Dec 2024 13:38:16 -0500 Subject: [PATCH 26/54] feat: replace the reconnection --- .../common/messaging/solace_messaging.py | 9 +++++++ .../components/inputs_outputs/broker_base.py | 27 +++---------------- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 1eed469f..eac51c1c 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -142,6 +142,9 @@ def connect(self): .with_reconnection_retry_strategy( RetryStrategy.forever_retry(retry_interval) ) + .with_connection_retry_strategy( + RetryStrategy.forever_retry(retry_interval) + ) .build() ) elif strategy and strategy == "parametrized_retry": @@ -159,6 +162,9 @@ def connect(self): .with_reconnection_retry_strategy( RetryStrategy.parametrized_retry(retry_count, retry_wait) ) + .with_connection_retry_strategy( + RetryStrategy.parametrized_retry(retry_count, retry_wait) + ) .build() ) else: @@ -172,6 +178,9 @@ def connect(self): .with_reconnection_retry_strategy( RetryStrategy.parametrized_retry(20, 3000) ) + .with_connection_retry_strategy( + RetryStrategy.parametrized_retry(20, 3000) + ) .build() ) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index dfe2f985..c072ad0d 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -8,7 +8,6 @@ from ...common.message import Message from ...common.messaging.messaging_builder import MessagingServiceBuilder from ...common.utils import encode_payload, decode_payload -from ...common.log import log # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes @@ -52,20 +51,9 @@ def invoke(self, message, data): pass def connect(self): - while not self.stop_signal.is_set(): - if not self.connected: - try: - self.messaging_service.connect() - self.connected = self.messaging_service.is_connected - except Exception as e: - log.error( - f"Error connecting to broker: {e}. \n Retrying in {self.connection_repeat_sleep_time} seconds." - ) - self.stop_signal.wait(timeout=self.connection_repeat_sleep_time) - self.grow_sleep_time() - else: - self.reset_sleep_time() - break + if not self.connected: + self.messaging_service.connect() + self.connected = self.messaging_service.is_connected def disconnect(self): if self.connected: @@ -123,12 +111,3 @@ def start(self): def generate_uuid(self): return str(uuid.uuid4()) - - def grow_sleep_time(self): - if self.connection_repeat_sleep_time < 60: - self.connection_repeat_sleep_time *= 2 - if self.connection_repeat_sleep_time > 60: - self.connection_repeat_sleep_time = 60 - - def reset_sleep_time(self): - self.connection_repeat_sleep_time = 1 From aa535d2de0edd0c4d61945e6c27b4f0ae0be6a14 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 20 Dec 2024 15:45:08 -0500 Subject: [PATCH 27/54] feat: refactor monitoring --- src/solace_ai_connector/common/monitoring.py | 60 +++++++++++++++++-- .../components/component_base.py | 45 +++++++------- .../components/inputs_outputs/broker_base.py | 4 +- 3 files changed, 80 insertions(+), 29 deletions(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index a00ea32d..bd0c5ac1 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -1,9 +1,10 @@ from typing import Any, List from enum import Enum +from threading import Lock class Metrics(Enum): - SOLCLIENT_STATS_RX_ACKED = 0 + SOLCLIENT_STATS_RX_ACKED = "SOLCLIENT_STATS_RX_ACKED" class Monitoring: @@ -15,7 +16,7 @@ class Monitoring: _initialized = False _ready = False _live = False - _interval = 60 + _interval = 3 def __new__(cls, *args, **kwargs): if not cls._instance: @@ -34,6 +35,22 @@ def __init__(self, config: dict[str, Any] = None) -> None: self._initialized = True self._collected_metrics = {} + self._lock = Lock() + self._initialize_metrics() + + def _initialize_metrics(self) -> None: + """ + Initialize the MetricCollector. + """ + self._required_metrics = [metric.value for metric in Metrics] + + def get_required_metrics(self) -> List[Metrics]: + """ + Get the required metrics for the MetricCollector. + + :return: List of required metrics + """ + return self._required_metrics def set_required_metrics(self, required_metrics: List[Metrics]) -> None: """ @@ -81,13 +98,46 @@ def collect_metrics(self, metrics: dict[Metrics, dict[str, Any]]) -> None: :param metrics: Dictionary of metrics """ - for key, value in metrics.items(): - self._collected_metrics[key.value] = value + with self._lock: + for key, value in metrics.items(): + self._collected_metrics[key] = value - def get_collected_metrics(self) -> List[dict[str, Any]]: + def get_detailed_metrics(self) -> List[dict[str, Any]]: """ Retrieve collected metrics. :return: Dictionary of collected metrics """ return self._collected_metrics + + def get_aggregated_metrics(self) -> List[dict[str, Any]]: + """ + Retrieve collected metrics. + + :return: Dictionary of collected metrics + """ + aggregated_metrics = {} + for key, value in self._collected_metrics.items(): + new_key = tuple( + item for item in key if item[0] not in ["flow_index", "component_index"] + ) + + if new_key not in aggregated_metrics: + aggregated_metrics[new_key] = value + else: + # aggregate metrics: sum + metric = key.metric + aggregated_timestamp = aggregated_metrics[new_key].timestamp + metric_value = value.value + metric_timestamp = value.timestamp + + if metric in [ + Metrics.SOLCLIENT_STATS_RX_ACKED + ]: # add metrics that need to be aggregated by sum + aggregated_metrics[new_key].value += sum(metric_value) + + # set timestamp to the latest + if metric_timestamp > aggregated_timestamp: + aggregated_metrics[new_key].timestamp = metric_timestamp + + return aggregated_metrics diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index d456dc96..b9ceb3a7 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -63,6 +63,8 @@ def __init__(self, module_info, **kwargs): self.setup_communications() self.setup_broker_request_response() + self.monitoring = Monitoring() + def grow_sleep_time(self): if self.event_message_repeat_sleep_time < 60: self.event_message_repeat_sleep_time *= 2 @@ -329,7 +331,11 @@ def setup_broker_request_response(self): "request_expiry_ms": request_expiry_ms, } - for key in ["response_topic_prefix", "response_queue_prefix", "response_topic_insertion_expression"]: + for key in [ + "response_topic_prefix", + "response_queue_prefix", + "response_topic_insertion_expression", + ]: if key in self.broker_request_response_config: rrc_config[key] = self.broker_request_response_config[key] @@ -475,29 +481,25 @@ def do_broker_request_response( def get_metrics_with_header(self) -> dict[dict[str, Any], Any]: metrics = {} + required_metrics = self.monitoring.get_required_metrics() pure_metrics = self.get_metrics() for metric, value in pure_metrics.items(): - key = tuple( - [ - ("flow", self.flow_name), - ("flow_index", self.index), - ("component", self.name), - ("component_index", self.component_index), - ("metric", metric), - ] - ) + # filter metrics + if metric in required_metrics: + key = tuple( + [ + ("flow", self.flow_name), + ("flow_index", self.index), + ("component", self.name), + ("component_index", self.component_index), + ("metric", metric), + ] + ) - value = {"value": value, "timestamp": int(time.time())} - log.debug( - "Metrics - flow: %s, component: %s, metric: %s, value: %s", - self.flow_name, - self.name, - metric, - value, - ) + value = {"value": value, "timestamp": int(time.time())} - metrics[key] = value + metrics[key] = value return metrics def get_metrics(self) -> dict[str, Any]: @@ -507,14 +509,13 @@ def run_micro_monitoring(self) -> None: """ Start the metric collection process in a loop. """ - monitoring = Monitoring() try: while not self.stop_signal.is_set(): # Collect metrics metrics = self.get_metrics_with_header() - monitoring.collect_metrics(metrics) + self.monitoring.collect_metrics(metrics) # Wait for the next interval - sleep_interval = monitoring.get_interval() + sleep_interval = self.monitoring.get_interval() self.stop_signal.wait(timeout=sleep_interval) except KeyboardInterrupt: log.info("Monitoring stopped.") diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 5a4464a8..65caebc8 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -122,7 +122,7 @@ def get_metrics(self): stats_dict = {} metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() for metric_key in required_metrics: - metric = SolaceMetrics(metric_key) - stats_dict[metric_key] = metrics.get_value(SolaceMetrics(metric)) + metric = SolaceMetrics(metric_key.value) + stats_dict[metric_key.value] = metrics.get_value(SolaceMetrics(metric)) return stats_dict From bc8251c3504e482069706839773bc10eabb50732 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 20 Dec 2024 17:03:13 -0500 Subject: [PATCH 28/54] feat: add connection metric --- .../common/messaging/solace_messaging.py | 2 +- src/solace_ai_connector/common/monitoring.py | 14 +++++++++++++- .../components/inputs_outputs/broker_base.py | 18 ++---------------- .../components/inputs_outputs/broker_input.py | 19 +++++++++++++++++++ .../inputs_outputs/broker_output.py | 4 ++++ 5 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index eac51c1c..a8576a76 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -266,7 +266,7 @@ def disconnect(self): log.debug("Error disconnecting: %s", exception) def is_connected(self): - return self.messaging_service.is_connected() + return self.messaging_service.is_connected def send_message( self, diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index bd0c5ac1..8d11bb69 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -5,6 +5,10 @@ class Metrics(Enum): SOLCLIENT_STATS_RX_ACKED = "SOLCLIENT_STATS_RX_ACKED" + SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS = ( + "SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS" + ) + IS_CONNECTED = "IS_CONNECTED" class Monitoring: @@ -132,10 +136,18 @@ def get_aggregated_metrics(self) -> List[dict[str, Any]]: metric_timestamp = value.timestamp if metric in [ - Metrics.SOLCLIENT_STATS_RX_ACKED + Metrics.SOLCLIENT_STATS_RX_ACKED, + Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, ]: # add metrics that need to be aggregated by sum aggregated_metrics[new_key].value += sum(metric_value) + if metric in [ + metric.IS_CONNECTED + ]: # add metrics that need to be aggregated by max + aggregated_metrics[new_key].value = ( + aggregated_metrics[new_key].value or metric_value + ) + # set timestamp to the latest if metric_timestamp > aggregated_timestamp: aggregated_metrics[new_key].timestamp = metric_timestamp diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 65caebc8..76531510 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -1,18 +1,14 @@ """Base class for broker input/output components for the Solace AI Event Connector""" import uuid -import json from typing import List from abc import abstractmethod -from solace.messaging.utils.manageable import ApiMetrics, Metric as SolaceMetrics from ..component_base import ComponentBase from ...common.message import Message from ...common.messaging.messaging_builder import MessagingServiceBuilder from ...common.utils import encode_payload, decode_payload -from ...common.log import log -from ...common.monitoring import Metrics # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes @@ -58,12 +54,12 @@ def invoke(self, message, data): def connect(self): if not self.connected: self.messaging_service.connect() - self.connected = self.messaging_service.is_connected + self.connected = self.messaging_service.is_connected() def disconnect(self): if self.connected: self.messaging_service.disconnect() - self.connected = self.messaging_service.is_connected + self.connected = self.messaging_service.is_connected() def stop_component(self): self.disconnect() @@ -116,13 +112,3 @@ def start(self): def generate_uuid(self): return str(uuid.uuid4()) - - def get_metrics(self): - required_metrics = [Metrics.SOLCLIENT_STATS_RX_ACKED] - stats_dict = {} - metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() - for metric_key in required_metrics: - metric = SolaceMetrics(metric_key.value) - stats_dict[metric_key.value] = metrics.get_value(SolaceMetrics(metric)) - - return stats_dict diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 670519d6..1f5d972c 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -1,8 +1,12 @@ """Input broker component for the Solace AI Event Connector""" +from solace.messaging.utils.manageable import ApiMetrics, Metric as SolaceMetrics + from ...common.log import log from .broker_base import BrokerBase from ...common.message import Message +from ...common.monitoring import Metrics + info = { "class_name": "BrokerInput", @@ -133,3 +137,18 @@ def acknowledge_message(self, broker_message): def get_acknowledgement_callback(self): current_broker_message = self.current_broker_message return lambda: self.acknowledge_message(current_broker_message) + + def get_metrics(self): + required_metrics = [ + Metrics.SOLCLIENT_STATS_RX_ACKED, + Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, + ] + stats_dict = {} + metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() + for metric_key in required_metrics: + metric = SolaceMetrics(metric_key.value) + stats_dict[metric_key.value] = metrics.get_value(SolaceMetrics(metric)) + + stats_dict[Metrics.IS_CONNECTED.value] = self.messaging_service.is_connected() + + return stats_dict diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_output.py b/src/solace_ai_connector/components/inputs_outputs/broker_output.py index 25809b80..3188c422 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_output.py @@ -93,6 +93,7 @@ class BrokerOutput(BrokerBase): + def __init__(self, module_info=None, **kwargs): module_info = module_info or info super().__init__(module_info, **kwargs) @@ -152,3 +153,6 @@ def handle_message_ack_from_broker(self, context): message.call_acknowledgements() else: log.error("No message found in context for acknowledgement") + + def get_metrics(self): + return {} From 012d5443af8565d0ecd1dad6d6c0ee6b2da89a7b Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 2 Jan 2025 13:55:30 -0500 Subject: [PATCH 29/54] convert connection to async --- examples/llm/litellm_chat.yaml | 8 ++++---- .../common/messaging/solace_messaging.py | 9 +++++++-- src/solace_ai_connector/common/monitoring.py | 3 ++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/examples/llm/litellm_chat.yaml b/examples/llm/litellm_chat.yaml index 84f209db..428a55aa 100644 --- a/examples/llm/litellm_chat.yaml +++ b/examples/llm/litellm_chat.yaml @@ -40,9 +40,9 @@ log: logback: rollingpolicy: file-name-pattern: "${LOG_FILE}.%d{yyyy-MM-dd}.%i.gz" - max-file-size: 600B - max-history: 2 - total-size-cap: 21KB #1GB + max-file-size: 100MB + max-history: 5 + total-size-cap: 1GB shared_config: - broker_config: &broker_connection @@ -52,7 +52,7 @@ shared_config: broker_password: ${SOLACE_BROKER_PASSWORD} broker_vpn: ${SOLACE_BROKER_VPN} reconnection_strategy: forever_retry # options: forever_retry, parametrized_retry - retry_interval: 1000 # in milliseconds + retry_interval: 10000 # in milliseconds # Take from input broker and publish back to Solace flows: diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index a8576a76..d3c9c4ab 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -134,7 +134,9 @@ def connect(self): if strategy and strategy == "forever_retry": retry_interval = self.broker_properties.get("retry_interval") if not retry_interval: - log.warning("retry_interval not provided, using default value of 3000") + log.warning( + "retry_interval not provided, using default value of 3000 milliseconds" + ) retry_interval = 3000 self.messaging_service = ( MessagingService.builder() @@ -185,7 +187,10 @@ def connect(self): ) # Blocking connect thread - self.messaging_service.connect() + result = self.messaging_service.connect_async() + if result.result() is None: + log.error("Failed to connect to broker") + return False # Event Handling for the messaging service self.service_handler = ServiceEventHandler() diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 8d11bb69..7a194ffa 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -20,7 +20,7 @@ class Monitoring: _initialized = False _ready = False _live = False - _interval = 3 + _interval = 10 def __new__(cls, *args, **kwargs): if not cls._instance: @@ -122,6 +122,7 @@ def get_aggregated_metrics(self) -> List[dict[str, Any]]: """ aggregated_metrics = {} for key, value in self._collected_metrics.items(): + # remove flow_index and component_index from key new_key = tuple( item for item in key if item[0] not in ["flow_index", "component_index"] ) From 776cc71ff79e436d77abba7e9f6309070785cdaa Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 2 Jan 2025 16:59:11 -0500 Subject: [PATCH 30/54] get metrics enum --- src/solace_ai_connector/common/monitoring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 7a194ffa..2186efab 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -62,7 +62,7 @@ def set_required_metrics(self, required_metrics: List[Metrics]) -> None: :param required_metrics: List of required metrics """ - self._required_metrics = required_metrics + self._required_metrics = [metric.value for metric in required_metrics] def set_readiness(self, ready: bool) -> None: """ From e1b5c464e7afe2fa9d0a944aa717e10cf272e5a7 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 3 Jan 2025 12:39:11 -0500 Subject: [PATCH 31/54] add types of metrics --- src/solace_ai_connector/common/monitoring.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 2186efab..ebd7d6fd 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -10,6 +10,23 @@ class Metrics(Enum): ) IS_CONNECTED = "IS_CONNECTED" + @staticmethod + def get_type(metric: "Metrics") -> str: + """ + Get the type of the metric. + + :param metric: Metric + :return: Type of the metric + """ + if metric in [ + Metrics.SOLCLIENT_STATS_RX_ACKED, + Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, + Metrics.IS_CONNECTED, + ]: + return "integer" + # Add more cases here if needed + return "unknown" + class Monitoring: """ @@ -153,4 +170,7 @@ def get_aggregated_metrics(self) -> List[dict[str, Any]]: if metric_timestamp > aggregated_timestamp: aggregated_metrics[new_key].timestamp = metric_timestamp + # set type + aggregated_metrics[new_key].type = Metrics.get_type(metric) + return aggregated_metrics From d768771b80bf94b5d55b810466642ed8db006212 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 3 Jan 2025 14:39:31 -0500 Subject: [PATCH 32/54] use metrics rather than metric values --- src/solace_ai_connector/common/monitoring.py | 19 ++++++++++++------- .../components/component_base.py | 5 +++-- .../components/inputs_outputs/broker_input.py | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index ebd7d6fd..f096f634 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -63,7 +63,7 @@ def _initialize_metrics(self) -> None: """ Initialize the MetricCollector. """ - self._required_metrics = [metric.value for metric in Metrics] + self._required_metrics = [metric for metric in Metrics] def get_required_metrics(self) -> List[Metrics]: """ @@ -113,7 +113,7 @@ def get_interval(self) -> int: """ return self._interval - def collect_metrics(self, metrics: dict[Metrics, dict[str, Any]]) -> None: + def collect_metrics(self, metrics: dict[Metrics, dict[Metrics, Any]]) -> None: """ Collect metrics. @@ -131,7 +131,9 @@ def get_detailed_metrics(self) -> List[dict[str, Any]]: """ return self._collected_metrics - def get_aggregated_metrics(self) -> List[dict[str, Any]]: + def get_aggregated_metrics( + self, required_metrics: List[Metrics] = None + ) -> List[dict[str, Any]]: """ Retrieve collected metrics. @@ -139,6 +141,13 @@ def get_aggregated_metrics(self) -> List[dict[str, Any]]: """ aggregated_metrics = {} for key, value in self._collected_metrics.items(): + # get metric + metric = next(item[1] for item in key if item[0] == "metric") + + # skip metrics that are not required + if required_metrics and metric not in required_metrics: + continue + # remove flow_index and component_index from key new_key = tuple( item for item in key if item[0] not in ["flow_index", "component_index"] @@ -148,7 +157,6 @@ def get_aggregated_metrics(self) -> List[dict[str, Any]]: aggregated_metrics[new_key] = value else: # aggregate metrics: sum - metric = key.metric aggregated_timestamp = aggregated_metrics[new_key].timestamp metric_value = value.value metric_timestamp = value.timestamp @@ -170,7 +178,4 @@ def get_aggregated_metrics(self) -> List[dict[str, Any]]: if metric_timestamp > aggregated_timestamp: aggregated_metrics[new_key].timestamp = metric_timestamp - # set type - aggregated_metrics[new_key].type = Metrics.get_type(metric) - return aggregated_metrics diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index b9ceb3a7..79c5ad12 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -14,6 +14,7 @@ from ..common.event import Event, EventType from ..flow.request_response_flow_controller import RequestResponseFlowController from ..common.monitoring import Monitoring +from ..common.monitoring import Metrics DEFAULT_QUEUE_TIMEOUT_MS = 1000 DEFAULT_QUEUE_MAX_DEPTH = 5 @@ -479,7 +480,7 @@ def do_broker_request_response( f"Broker request response controller not found for component {self.name}" ) - def get_metrics_with_header(self) -> dict[dict[str, Any], Any]: + def get_metrics_with_header(self) -> dict[dict[Metrics, Any], Any]: metrics = {} required_metrics = self.monitoring.get_required_metrics() @@ -502,7 +503,7 @@ def get_metrics_with_header(self) -> dict[dict[str, Any], Any]: metrics[key] = value return metrics - def get_metrics(self) -> dict[str, Any]: + def get_metrics(self) -> dict[Metrics, Any]: return {} def run_micro_monitoring(self) -> None: diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 1f5d972c..3ad4b7c8 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -147,8 +147,8 @@ def get_metrics(self): metrics: "ApiMetrics" = self.messaging_service.messaging_service.metrics() for metric_key in required_metrics: metric = SolaceMetrics(metric_key.value) - stats_dict[metric_key.value] = metrics.get_value(SolaceMetrics(metric)) + stats_dict[metric_key] = metrics.get_value(SolaceMetrics(metric)) - stats_dict[Metrics.IS_CONNECTED.value] = self.messaging_service.is_connected() + stats_dict[Metrics.IS_CONNECTED] = self.messaging_service.is_connected() return stats_dict From 3ba10421f18c6a604fa87e2adfbb72f62b579590 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 3 Jan 2025 14:50:23 -0500 Subject: [PATCH 33/54] fix bug --- src/solace_ai_connector/common/monitoring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index f096f634..04245191 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -79,7 +79,7 @@ def set_required_metrics(self, required_metrics: List[Metrics]) -> None: :param required_metrics: List of required metrics """ - self._required_metrics = [metric.value for metric in required_metrics] + self._required_metrics = [metric for metric in required_metrics] def set_readiness(self, ready: bool) -> None: """ From d31b032f64e72d4e5b2fe0de32fd2154bccc3005 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 3 Jan 2025 15:05:47 -0500 Subject: [PATCH 34/54] update type --- src/solace_ai_connector/common/monitoring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 04245191..f1368cb3 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -133,7 +133,7 @@ def get_detailed_metrics(self) -> List[dict[str, Any]]: def get_aggregated_metrics( self, required_metrics: List[Metrics] = None - ) -> List[dict[str, Any]]: + ) -> dict[tuple, dict[str, Any]]: """ Retrieve collected metrics. From 0c5bf8135546adea3b87a04b2b2a40a656269b05 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Sun, 5 Jan 2025 22:18:59 -0500 Subject: [PATCH 35/54] convert monitoring output to dictionary --- src/solace_ai_connector/common/monitoring.py | 18 ++++++++++++++++-- .../components/inputs_outputs/broker_input.py | 4 +++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index f1368cb3..7e0c9523 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -133,7 +133,7 @@ def get_detailed_metrics(self) -> List[dict[str, Any]]: def get_aggregated_metrics( self, required_metrics: List[Metrics] = None - ) -> dict[tuple, dict[str, Any]]: + ) -> List[dict[str, Any]]: """ Retrieve collected metrics. @@ -178,4 +178,18 @@ def get_aggregated_metrics( if metric_timestamp > aggregated_timestamp: aggregated_metrics[new_key].timestamp = metric_timestamp - return aggregated_metrics + # convert to dictionary + formatted_metrics = [] + for key, value in aggregated_metrics.items(): + metric_dict = dict(key) + formatted_metrics.append( + { + "flow": metric_dict.get("flow"), + "component": metric_dict.get("component"), + "metric": metric_dict.get("metric").value, + "timestamp": value.timestamp, + "value": value.value, + } + ) + + return formatted_metrics diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 3ad4b7c8..fb6c4d6f 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -149,6 +149,8 @@ def get_metrics(self): metric = SolaceMetrics(metric_key.value) stats_dict[metric_key] = metrics.get_value(SolaceMetrics(metric)) - stats_dict[Metrics.IS_CONNECTED] = self.messaging_service.is_connected() + stats_dict[Metrics.IS_CONNECTED] = ( + 1 if self.messaging_service.is_connected() else 0 + ) return stats_dict From 0d6421bf759e562e3dc118ad4c0a1abd10272261 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Sun, 5 Jan 2025 22:34:40 -0500 Subject: [PATCH 36/54] fix bug --- src/solace_ai_connector/common/monitoring.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 7e0c9523..f7feb955 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -186,9 +186,9 @@ def get_aggregated_metrics( { "flow": metric_dict.get("flow"), "component": metric_dict.get("component"), - "metric": metric_dict.get("metric").value, - "timestamp": value.timestamp, - "value": value.value, + "metric": metric_dict.get("metric"), + "timestamp": value["timestamp"], + "value": value["value"], } ) From 7a726ba6d65a70f42fd4baf58470426de5abbb6c Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 9 Jan 2025 14:04:30 -0500 Subject: [PATCH 37/54] feat: add connection status --- .../common/messaging/solace_messaging.py | 19 +++++- src/solace_ai_connector/common/monitoring.py | 58 +++++++++++-------- .../components/component_base.py | 32 ++++++++++ .../components/inputs_outputs/broker_input.py | 7 +-- .../solace_ai_connector.py | 6 -- 5 files changed, 87 insertions(+), 35 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index d3c9c4ab..f796c6ba 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -3,6 +3,7 @@ import logging import os import certifi +import threading from solace.messaging.messaging_service import ( MessagingService, @@ -34,6 +35,16 @@ from ..log import log +_is_connected = False +_lock = threading.Lock() + + +def change_connection_status(status: bool): + global _is_connected + with _lock: + _is_connected = status + + class MessageHandlerImpl(MessageHandler): def __init__(self, persistent_receiver: PersistentMessageReceiver): @@ -71,14 +82,17 @@ class ServiceEventHandler( ): def on_reconnected(self, service_event: ServiceEvent): + change_connection_status(True) log.debug("Reconnected to broker: %s", service_event.get_cause()) log.debug("Message: %s", service_event.get_message()) def on_reconnecting(self, event: "ServiceEvent"): + change_connection_status(False) log.debug("Reconnecting - Error cause: %s", event.get_cause()) log.debug("Message: %s", event.get_message()) def on_service_interrupted(self, event: "ServiceEvent"): + change_connection_status(False) log.debug("Service interrupted - Error cause: %s", event.get_cause()) log.debug("Message: %s", event.get_message()) @@ -109,6 +123,7 @@ def __init__(self, broker_properties: dict): # set_python_solace_log_level("DEBUG") def __del__(self): + change_connection_status(False) self.disconnect() def connect(self): @@ -191,6 +206,7 @@ def connect(self): if result.result() is None: log.error("Failed to connect to broker") return False + change_connection_status(True) # Event Handling for the messaging service self.service_handler = ServiceEventHandler() @@ -267,11 +283,12 @@ def bind_to_queue( def disconnect(self): try: self.messaging_service.disconnect() + change_connection_status(False) except Exception as exception: # pylint: disable=broad-except log.debug("Error disconnecting: %s", exception) def is_connected(self): - return self.messaging_service.is_connected + return _is_connected def send_message( self, diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index f7feb955..e3ef75d8 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -8,7 +8,6 @@ class Metrics(Enum): SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS = ( "SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS" ) - IS_CONNECTED = "IS_CONNECTED" @staticmethod def get_type(metric: "Metrics") -> str: @@ -21,7 +20,6 @@ def get_type(metric: "Metrics") -> str: if metric in [ Metrics.SOLCLIENT_STATS_RX_ACKED, Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, - Metrics.IS_CONNECTED, ]: return "integer" # Add more cases here if needed @@ -56,6 +54,7 @@ def __init__(self, config: dict[str, Any] = None) -> None: self._initialized = True self._collected_metrics = {} + self._connection_status = {} self._lock = Lock() self._initialize_metrics() @@ -81,21 +80,13 @@ def set_required_metrics(self, required_metrics: List[Metrics]) -> None: """ self._required_metrics = [metric for metric in required_metrics] - def set_readiness(self, ready: bool) -> None: + def is_connected(self) -> int: """ - Set the readiness status of the MetricCollector. + Get the connection status of the broker. - :param ready: Readiness status + :return: Connection status """ - self._ready = ready - - def set_liveness(self, live: bool) -> None: - """ - Set the liveness status of the MetricCollector. - - :param live: Liveness status - """ - self._live = live + return 1 if self._live and self._ready else 0 def set_interval(self, interval: int) -> None: """ @@ -113,6 +104,30 @@ def get_interval(self) -> int: """ return self._interval + def set_connection_status(self, key, value: int) -> None: + """ + Set the connection status of the broker. + + :param key: Key + """ + self._connection_status[key] = value + + def get_connection_status(self) -> int: + """ + Get the connection status of the broker. + """ + status = 1 + for _, value in self._connection_status.items(): + # if a module is disconnected, the status is disconnected + if value == 0: + status = 0 + break + # if a module is connecting, the status is connecting + if status == 1 and value == 2: + status = 2 + + return value + def collect_metrics(self, metrics: dict[Metrics, dict[Metrics, Any]]) -> None: """ Collect metrics. @@ -148,9 +163,12 @@ def get_aggregated_metrics( if required_metrics and metric not in required_metrics: continue - # remove flow_index and component_index from key + # filter flow, flow_index, component, component_index from key new_key = tuple( - item for item in key if item[0] not in ["flow_index", "component_index"] + item + for item in key + if item[0] + not in ["flow", "flow_index", "component_module", "component_index"] ) if new_key not in aggregated_metrics: @@ -167,13 +185,6 @@ def get_aggregated_metrics( ]: # add metrics that need to be aggregated by sum aggregated_metrics[new_key].value += sum(metric_value) - if metric in [ - metric.IS_CONNECTED - ]: # add metrics that need to be aggregated by max - aggregated_metrics[new_key].value = ( - aggregated_metrics[new_key].value or metric_value - ) - # set timestamp to the latest if metric_timestamp > aggregated_timestamp: aggregated_metrics[new_key].timestamp = metric_timestamp @@ -184,7 +195,6 @@ def get_aggregated_metrics( metric_dict = dict(key) formatted_metrics.append( { - "flow": metric_dict.get("flow"), "component": metric_dict.get("component"), "metric": metric_dict.get("metric"), "timestamp": value["timestamp"], diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 79c5ad12..76119282 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -81,7 +81,9 @@ def create_thread_and_run(self): def run(self): # Start the micro monitoring thread monitoring_thread = threading.Thread(target=self.run_micro_monitoring) + connection_status_thread = threading.Thread(target=self.get_connection_status) monitoring_thread.start() + connection_status_thread.start() # Process events until the stop signal is set while not self.stop_signal.is_set(): event = None @@ -107,6 +109,7 @@ def run(self): self.stop_component() monitoring_thread.join() + connection_status_thread.join() def process_event_with_tracing(self, event): if self.trace_queue: @@ -493,6 +496,7 @@ def get_metrics_with_header(self) -> dict[dict[Metrics, Any], Any]: ("flow", self.flow_name), ("flow_index", self.index), ("component", self.name), + ("component_module", self.config.get("component_module")), ("component_index", self.component_index), ("metric", metric), ] @@ -506,6 +510,34 @@ def get_metrics_with_header(self) -> dict[dict[Metrics, Any], Any]: def get_metrics(self) -> dict[Metrics, Any]: return {} + def is_connected(self) -> int: + pass + + def get_connection_status(self) -> None: + """ + Get connection status + """ + try: + if self.config.get("component_module") == "broker_input": + while not self.stop_signal.is_set(): + key = tuple( + [ + ("flow", self.flow_name), + ("flow_index", self.index), + ("component", self.name), + ("component_index", self.component_index), + ] + ) + value = {"value": self.is_connected()} + + print(key, value) + + self.monitoring.set_connection_status(key, value) + # Wait 1 second for the next interval + self.stop_signal.wait(timeout=1) + except KeyboardInterrupt: + log.info("Monitoring connection status stopped.") + def run_micro_monitoring(self) -> None: """ Start the metric collection process in a loop. diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index fb6c4d6f..c0923f4f 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -138,6 +138,9 @@ def get_acknowledgement_callback(self): current_broker_message = self.current_broker_message return lambda: self.acknowledge_message(current_broker_message) + def is_connected(self): + return 1 if self.messaging_service.is_connected() else 0 + def get_metrics(self): required_metrics = [ Metrics.SOLCLIENT_STATS_RX_ACKED, @@ -149,8 +152,4 @@ def get_metrics(self): metric = SolaceMetrics(metric_key.value) stats_dict[metric_key] = metrics.get_value(SolaceMetrics(metric)) - stats_dict[Metrics.IS_CONNECTED] = ( - 1 if self.messaging_service.is_connected() else 0 - ) - return stats_dict diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 7db2b0c7..23155ce8 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -47,8 +47,6 @@ def run(self): if on_flow_creation: on_flow_creation(self.flows) - self.monitoring.set_readiness(True) - log.info("Solace AI Event Connector started successfully") except Exception as e: log.error("Error during Solace AI Event Connector startup: %s", str(e)) @@ -256,7 +254,3 @@ def stop(self): if self.trace_thread: self.trace_thread.join() - - # Update monitoring last - self.monitoring.set_liveness(False) - self.monitoring.set_readiness(False) From 5f56c79772dc3a3df1ff1f1c74616c87f6ea9f17 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Thu, 9 Jan 2025 17:52:51 -0500 Subject: [PATCH 38/54] feat: add reconnecting status --- .../common/messaging/solace_messaging.py | 36 ++++++++++++------- src/solace_ai_connector/common/monitoring.py | 25 ++++++------- .../components/component_base.py | 11 +++--- .../components/inputs_outputs/broker_base.py | 11 +++--- .../components/inputs_outputs/broker_input.py | 4 +-- 5 files changed, 49 insertions(+), 38 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index f796c6ba..91204417 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -4,6 +4,8 @@ import os import certifi import threading +import time +from enum import Enum from solace.messaging.messaging_service import ( MessagingService, @@ -35,14 +37,20 @@ from ..log import log -_is_connected = False +class ConnectionStatus(Enum): + RECONNECTING = 2 + CONNECTED = 1 + DISCONNECTED = 0 + + +_connection_status = ConnectionStatus.DISCONNECTED _lock = threading.Lock() -def change_connection_status(status: bool): - global _is_connected +def change_connection_status(status: ConnectionStatus): + global _connection_status with _lock: - _is_connected = status + _connection_status = status class MessageHandlerImpl(MessageHandler): @@ -82,17 +90,17 @@ class ServiceEventHandler( ): def on_reconnected(self, service_event: ServiceEvent): - change_connection_status(True) + change_connection_status(ConnectionStatus.CONNECTED) log.debug("Reconnected to broker: %s", service_event.get_cause()) log.debug("Message: %s", service_event.get_message()) def on_reconnecting(self, event: "ServiceEvent"): - change_connection_status(False) + change_connection_status(ConnectionStatus.RECONNECTING) log.debug("Reconnecting - Error cause: %s", event.get_cause()) log.debug("Message: %s", event.get_message()) def on_service_interrupted(self, event: "ServiceEvent"): - change_connection_status(False) + change_connection_status(ConnectionStatus.DISCONNECTED) log.debug("Service interrupted - Error cause: %s", event.get_cause()) log.debug("Message: %s", event.get_message()) @@ -123,7 +131,7 @@ def __init__(self, broker_properties: dict): # set_python_solace_log_level("DEBUG") def __del__(self): - change_connection_status(False) + change_connection_status(ConnectionStatus.DISCONNECTED) self.disconnect() def connect(self): @@ -203,10 +211,14 @@ def connect(self): # Blocking connect thread result = self.messaging_service.connect_async() + while not result.done(): + log.debug("Connecting to broker...") + time.sleep(5) + if result.result() is None: log.error("Failed to connect to broker") return False - change_connection_status(True) + change_connection_status(ConnectionStatus.CONNECTED) # Event Handling for the messaging service self.service_handler = ServiceEventHandler() @@ -283,12 +295,12 @@ def bind_to_queue( def disconnect(self): try: self.messaging_service.disconnect() - change_connection_status(False) + change_connection_status(ConnectionStatus.DISCONNECTED) except Exception as exception: # pylint: disable=broad-except log.debug("Error disconnecting: %s", exception) - def is_connected(self): - return _is_connected + def get_connection_status(self): + return _connection_status def send_message( self, diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index e3ef75d8..36460c77 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -2,6 +2,8 @@ from enum import Enum from threading import Lock +from ..common.messaging.solace_messaging import ConnectionStatus + class Metrics(Enum): SOLCLIENT_STATS_RX_ACKED = "SOLCLIENT_STATS_RX_ACKED" @@ -33,8 +35,6 @@ class Monitoring: _instance = None _initialized = False - _ready = False - _live = False _interval = 10 def __new__(cls, *args, **kwargs): @@ -80,14 +80,6 @@ def set_required_metrics(self, required_metrics: List[Metrics]) -> None: """ self._required_metrics = [metric for metric in required_metrics] - def is_connected(self) -> int: - """ - Get the connection status of the broker. - - :return: Connection status - """ - return 1 if self._live and self._ready else 0 - def set_interval(self, interval: int) -> None: """ Set the interval for the MetricCollector. @@ -116,15 +108,18 @@ def get_connection_status(self) -> int: """ Get the connection status of the broker. """ - status = 1 + status = ConnectionStatus.CONNECTED for _, value in self._connection_status.items(): # if a module is disconnected, the status is disconnected - if value == 0: - status = 0 + if value == ConnectionStatus.DISCONNECTED: + status = ConnectionStatus.DISCONNECTED break # if a module is connecting, the status is connecting - if status == 1 and value == 2: - status = 2 + if ( + status == ConnectionStatus.CONNECTED + and value == ConnectionStatus.RECONNECTING + ): + status = ConnectionStatus.RECONNECTING return value diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 76119282..017ec7cf 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -10,6 +10,7 @@ from ..common.utils import get_source_expression from ..transforms.transforms import Transforms from ..common.message import Message +from ..common.messaging.solace_messaging import ConnectionStatus from ..common.trace_message import TraceMessage from ..common.event import Event, EventType from ..flow.request_response_flow_controller import RequestResponseFlowController @@ -81,7 +82,9 @@ def create_thread_and_run(self): def run(self): # Start the micro monitoring thread monitoring_thread = threading.Thread(target=self.run_micro_monitoring) - connection_status_thread = threading.Thread(target=self.get_connection_status) + connection_status_thread = threading.Thread( + target=self.run_connection_status_monitoring + ) monitoring_thread.start() connection_status_thread.start() # Process events until the stop signal is set @@ -510,10 +513,10 @@ def get_metrics_with_header(self) -> dict[dict[Metrics, Any], Any]: def get_metrics(self) -> dict[Metrics, Any]: return {} - def is_connected(self) -> int: + def get_connection_status(self) -> ConnectionStatus: pass - def get_connection_status(self) -> None: + def run_connection_status_monitoring(self) -> None: """ Get connection status """ @@ -528,7 +531,7 @@ def get_connection_status(self) -> None: ("component_index", self.component_index), ] ) - value = {"value": self.is_connected()} + value = {"value": self.get_connection_status()} print(key, value) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 76531510..71e5896a 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -7,6 +7,7 @@ from ..component_base import ComponentBase from ...common.message import Message +from ...common.messaging.solace_messaging import ConnectionStatus from ...common.messaging.messaging_builder import MessagingServiceBuilder from ...common.utils import encode_payload, decode_payload @@ -43,7 +44,7 @@ def __init__(self, module_info, **kwargs): ) self.current_broker_message = None self.messages_to_ack = [] - self.connected = False + self.connected = ConnectionStatus.DISCONNECTED self.needs_acknowledgement = True self.connection_repeat_sleep_time = 5 @@ -52,14 +53,14 @@ def invoke(self, message, data): pass def connect(self): - if not self.connected: + if self.connected == ConnectionStatus.DISCONNECTED: self.messaging_service.connect() - self.connected = self.messaging_service.is_connected() + self.connected = ConnectionStatus.CONNECTED def disconnect(self): - if self.connected: + if self.connected == ConnectionStatus.CONNECTED: self.messaging_service.disconnect() - self.connected = self.messaging_service.is_connected() + self.connected = ConnectionStatus.DISCONNECTED def stop_component(self): self.disconnect() diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index c0923f4f..2a0f115f 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -138,8 +138,8 @@ def get_acknowledgement_callback(self): current_broker_message = self.current_broker_message return lambda: self.acknowledge_message(current_broker_message) - def is_connected(self): - return 1 if self.messaging_service.is_connected() else 0 + def get_connection_status(self): + return self.messaging_service.get_connection_status() def get_metrics(self): required_metrics = [ From b30c99b9af017cc109f0d7ab6a1ca0d94e746ed6 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 10 Jan 2025 11:26:43 -0500 Subject: [PATCH 39/54] feat: add reconnecting log and handled signals --- .../common/messaging/messaging_builder.py | 6 +++-- .../common/messaging/solace_messaging.py | 26 ++++++++++++++----- .../components/inputs_outputs/broker_base.py | 4 ++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/messaging_builder.py b/src/solace_ai_connector/common/messaging/messaging_builder.py index 826cdd45..10576d19 100644 --- a/src/solace_ai_connector/common/messaging/messaging_builder.py +++ b/src/solace_ai_connector/common/messaging/messaging_builder.py @@ -6,10 +6,12 @@ # Make a Messaging Service builder - this is a factory for Messaging Service objects class MessagingServiceBuilder: - def __init__(self, flow_lock_manager, flow_kv_store): + + def __init__(self, flow_lock_manager, flow_kv_store, stop_signal): self.broker_properties = {} self.flow_lock_manager = flow_lock_manager self.flow_kv_store = flow_kv_store + self.stop_signal = stop_signal def from_properties(self, broker_properties: dict): self.broker_properties = broker_properties @@ -17,7 +19,7 @@ def from_properties(self, broker_properties: dict): def build(self): if self.broker_properties["broker_type"] == "solace": - return SolaceMessaging(self.broker_properties) + return SolaceMessaging(self.broker_properties, self.stop_signal) elif self.broker_properties["broker_type"] == "dev_broker": return DevBroker( self.broker_properties, self.flow_lock_manager, self.flow_kv_store diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 91204417..1fcbfda9 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -89,6 +89,9 @@ class ServiceEventHandler( ReconnectionListener, ReconnectionAttemptListener, ServiceInterruptionListener ): + def __init__(self, stop_signal): + self.stop_signal = stop_signal + def on_reconnected(self, service_event: ServiceEvent): change_connection_status(ConnectionStatus.CONNECTED) log.debug("Reconnected to broker: %s", service_event.get_cause()) @@ -96,8 +99,18 @@ def on_reconnected(self, service_event: ServiceEvent): def on_reconnecting(self, event: "ServiceEvent"): change_connection_status(ConnectionStatus.RECONNECTING) - log.debug("Reconnecting - Error cause: %s", event.get_cause()) - log.debug("Message: %s", event.get_message()) + + def log_reconnecting(): + while ( + not self.stop_signal.is_set() + and _connection_status == ConnectionStatus.RECONNECTING + ): + log.debug("Reconnecting to broker: %s", event.get_cause()) + log.debug("Message: %s", event.get_message()) + self.stop_signal.wait(timeout=1) + + log_thread = threading.Thread(target=log_reconnecting) + log_thread.start() def on_service_interrupted(self, event: "ServiceEvent"): change_connection_status(ConnectionStatus.DISCONNECTED) @@ -118,13 +131,14 @@ def set_python_solace_log_level(level: str): # Create SolaceMessaging class inheriting from Messaging class SolaceMessaging(Messaging): - def __init__(self, broker_properties: dict): + def __init__(self, broker_properties: dict, stop_signal): super().__init__(broker_properties) self.persistent_receivers = [] self.messaging_service = None self.service_handler = None self.publisher = None self.persistent_receiver: PersistentMessageReceiver = None + self.stop_signal = stop_signal # MessagingService.set_core_messaging_log_level( # level="DEBUG", file="/home/efunnekotter/core.log" # ) @@ -211,9 +225,9 @@ def connect(self): # Blocking connect thread result = self.messaging_service.connect_async() - while not result.done(): + while not (self.stop_signal.is_set() or result.done()): log.debug("Connecting to broker...") - time.sleep(5) + self.stop_signal.wait(timeout=1) if result.result() is None: log.error("Failed to connect to broker") @@ -221,7 +235,7 @@ def connect(self): change_connection_status(ConnectionStatus.CONNECTED) # Event Handling for the messaging service - self.service_handler = ServiceEventHandler() + self.service_handler = ServiceEventHandler(self.stop_signal) self.messaging_service.add_reconnection_listener(self.service_handler) self.messaging_service.add_reconnection_attempt_listener(self.service_handler) self.messaging_service.add_service_interruption_listener(self.service_handler) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 71e5896a..37d5c22f 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -38,7 +38,9 @@ def __init__(self, module_info, **kwargs): self.broker_properties = self.get_broker_properties() if self.broker_properties["broker_type"] not in ["test", "test_streaming"]: self.messaging_service = ( - MessagingServiceBuilder(self.flow_lock_manager, self.flow_kv_store) + MessagingServiceBuilder( + self.flow_lock_manager, self.flow_kv_store, self.stop_signal + ) .from_properties(self.broker_properties) .build() ) From a64a48b6e29651073dadc867acc002ba99e39515 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 10 Jan 2025 11:43:27 -0500 Subject: [PATCH 40/54] fix: update status --- src/solace_ai_connector/common/monitoring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 36460c77..7b592142 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -121,7 +121,7 @@ def get_connection_status(self) -> int: ): status = ConnectionStatus.RECONNECTING - return value + return status def collect_metrics(self, metrics: dict[Metrics, dict[Metrics, Any]]) -> None: """ From 7467b8f912d01c83c0940a21d2a79ded83d398f6 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 10 Jan 2025 12:19:52 -0500 Subject: [PATCH 41/54] fix: update log --- .../common/messaging/solace_messaging.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 1fcbfda9..9f12a765 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -94,8 +94,8 @@ def __init__(self, stop_signal): def on_reconnected(self, service_event: ServiceEvent): change_connection_status(ConnectionStatus.CONNECTED) - log.debug("Reconnected to broker: %s", service_event.get_cause()) - log.debug("Message: %s", service_event.get_message()) + log.error("Reconnected to broker: %s", service_event.get_cause()) + log.error("Message: %s", service_event.get_message()) def on_reconnecting(self, event: "ServiceEvent"): change_connection_status(ConnectionStatus.RECONNECTING) @@ -105,8 +105,8 @@ def log_reconnecting(): not self.stop_signal.is_set() and _connection_status == ConnectionStatus.RECONNECTING ): - log.debug("Reconnecting to broker: %s", event.get_cause()) - log.debug("Message: %s", event.get_message()) + log.error("Reconnecting to broker: %s", event.get_cause()) + log.error("Message: %s", event.get_message()) self.stop_signal.wait(timeout=1) log_thread = threading.Thread(target=log_reconnecting) @@ -226,7 +226,7 @@ def connect(self): # Blocking connect thread result = self.messaging_service.connect_async() while not (self.stop_signal.is_set() or result.done()): - log.debug("Connecting to broker...") + log.info("Connecting to broker...") self.stop_signal.wait(timeout=1) if result.result() is None: From d49b4ef64d8e98e2ad4801e907c72a17de242079 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 10 Jan 2025 12:33:02 -0500 Subject: [PATCH 42/54] fix: fix bug --- src/solace_ai_connector/components/component_base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 017ec7cf..da8f86c4 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -532,9 +532,6 @@ def run_connection_status_monitoring(self) -> None: ] ) value = {"value": self.get_connection_status()} - - print(key, value) - self.monitoring.set_connection_status(key, value) # Wait 1 second for the next interval self.stop_signal.wait(timeout=1) From ae692489df4be7d3d65b6825770b7b3fd80e6fe2 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 10 Jan 2025 12:35:31 -0500 Subject: [PATCH 43/54] fix: fix bug --- src/solace_ai_connector/common/messaging/solace_messaging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 9f12a765..c783646f 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -107,7 +107,7 @@ def log_reconnecting(): ): log.error("Reconnecting to broker: %s", event.get_cause()) log.error("Message: %s", event.get_message()) - self.stop_signal.wait(timeout=1) + self.stop_signal.wait(timeout=60) log_thread = threading.Thread(target=log_reconnecting) log_thread.start() @@ -227,7 +227,7 @@ def connect(self): result = self.messaging_service.connect_async() while not (self.stop_signal.is_set() or result.done()): log.info("Connecting to broker...") - self.stop_signal.wait(timeout=1) + self.stop_signal.wait(timeout=60) if result.result() is None: log.error("Failed to connect to broker") From 8a9ba5c29c63ae2d03269cdded31a046603d36bc Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 10 Jan 2025 13:34:19 -0500 Subject: [PATCH 44/54] fix: resolve connection logs --- .../common/messaging/solace_messaging.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index c783646f..ad11bbcd 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -225,9 +225,14 @@ def connect(self): # Blocking connect thread result = self.messaging_service.connect_async() - while not (self.stop_signal.is_set() or result.done()): - log.info("Connecting to broker...") - self.stop_signal.wait(timeout=60) + + def log_connecting(): + while not (self.stop_signal.is_set() or result.done()): + log.info("Connecting to broker...") + self.stop_signal.wait(timeout=60) + + log_thread = threading.Thread(target=log_connecting) + log_thread.start() if result.result() is None: log.error("Failed to connect to broker") From 1bff59e95bb2f1fb871d0a12a4b5633c7534e8cc Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 10 Jan 2025 15:04:37 -0500 Subject: [PATCH 45/54] fix: handle threads --- .../common/messaging/solace_messaging.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index ad11bbcd..7009c941 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -226,10 +226,18 @@ def connect(self): # Blocking connect thread result = self.messaging_service.connect_async() + # log connection attempts + # note: the connection/reconnection handler API does not log connection attempts + self.stop_connection_log = threading.Event() + def log_connecting(): - while not (self.stop_signal.is_set() or result.done()): + while not ( + self.stop_signal.is_set() + or self.stop_connection_log.is_set() + or result.done() + ): log.info("Connecting to broker...") - self.stop_signal.wait(timeout=60) + self.stop_signal.wait(timeout=30) log_thread = threading.Thread(target=log_connecting) log_thread.start() @@ -237,6 +245,8 @@ def log_connecting(): if result.result() is None: log.error("Failed to connect to broker") return False + self.stop_connection_log.set() + change_connection_status(ConnectionStatus.CONNECTED) # Event Handling for the messaging service From 89ecf1d3a15f04712a94188abdc2000d51c6249e Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 13 Jan 2025 12:53:48 -0500 Subject: [PATCH 46/54] fix: update connection state machine --- src/solace_ai_connector/common/monitoring.py | 17 ++++++++++++----- .../components/component_base.py | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 7b592142..c9a79bc8 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -108,12 +108,14 @@ def get_connection_status(self) -> int: """ Get the connection status of the broker. """ - status = ConnectionStatus.CONNECTED + started = True + # default status is disconnected + status = ConnectionStatus.DISCONNECTED for _, value in self._connection_status.items(): - # if a module is disconnected, the status is disconnected - if value == ConnectionStatus.DISCONNECTED: - status = ConnectionStatus.DISCONNECTED - break + if started: + status = value + started = False + # if a module is connecting, the status is connecting if ( status == ConnectionStatus.CONNECTED @@ -121,6 +123,11 @@ def get_connection_status(self) -> int: ): status = ConnectionStatus.RECONNECTING + # if a module is disconnected, the status is disconnected + if value == ConnectionStatus.DISCONNECTED: + status = ConnectionStatus.DISCONNECTED + break + return status def collect_metrics(self, metrics: dict[Metrics, dict[Metrics, Any]]) -> None: diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index da8f86c4..742df768 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -531,7 +531,7 @@ def run_connection_status_monitoring(self) -> None: ("component_index", self.component_index), ] ) - value = {"value": self.get_connection_status()} + value = self.get_connection_status() self.monitoring.set_connection_status(key, value) # Wait 1 second for the next interval self.stop_signal.wait(timeout=1) From 2e0cb5f92a84c9f37ce585a12ab6e5dab98c32b6 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 14 Jan 2025 14:32:54 -0500 Subject: [PATCH 47/54] feat: add prefix to the broker logs --- .../common/messaging/messaging_builder.py | 7 +- .../common/messaging/solace_messaging.py | 64 +++++++++++++------ .../components/component_base.py | 2 +- .../components/inputs_outputs/broker_base.py | 6 +- 4 files changed, 53 insertions(+), 26 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/messaging_builder.py b/src/solace_ai_connector/common/messaging/messaging_builder.py index 10576d19..439ba7b2 100644 --- a/src/solace_ai_connector/common/messaging/messaging_builder.py +++ b/src/solace_ai_connector/common/messaging/messaging_builder.py @@ -7,11 +7,12 @@ # Make a Messaging Service builder - this is a factory for Messaging Service objects class MessagingServiceBuilder: - def __init__(self, flow_lock_manager, flow_kv_store, stop_signal): + def __init__(self, flow_lock_manager, flow_kv_store, broker_name, stop_signal): self.broker_properties = {} self.flow_lock_manager = flow_lock_manager self.flow_kv_store = flow_kv_store self.stop_signal = stop_signal + self.broker_name = broker_name def from_properties(self, broker_properties: dict): self.broker_properties = broker_properties @@ -19,7 +20,9 @@ def from_properties(self, broker_properties: dict): def build(self): if self.broker_properties["broker_type"] == "solace": - return SolaceMessaging(self.broker_properties, self.stop_signal) + return SolaceMessaging( + self.broker_properties, self.broker_name, self.stop_signal + ) elif self.broker_properties["broker_type"] == "dev_broker": return DevBroker( self.broker_properties, self.flow_lock_manager, self.flow_kv_store diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 7009c941..2e28abfe 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -4,7 +4,6 @@ import os import certifi import threading -import time from enum import Enum from solace.messaging.messaging_service import ( @@ -89,13 +88,20 @@ class ServiceEventHandler( ReconnectionListener, ReconnectionAttemptListener, ServiceInterruptionListener ): - def __init__(self, stop_signal): + def __init__(self, stop_signal, error_prefix=""): self.stop_signal = stop_signal + self.error_prefix = error_prefix def on_reconnected(self, service_event: ServiceEvent): change_connection_status(ConnectionStatus.CONNECTED) - log.error("Reconnected to broker: %s", service_event.get_cause()) - log.error("Message: %s", service_event.get_message()) + log.error( + f"{self.error_prefix} Reconnected to broker: %s", + service_event.get_cause(), + ) + log.error( + f"{self.error_prefix} Message: %s", + service_event.get_message(), + ) def on_reconnecting(self, event: "ServiceEvent"): change_connection_status(ConnectionStatus.RECONNECTING) @@ -105,8 +111,14 @@ def log_reconnecting(): not self.stop_signal.is_set() and _connection_status == ConnectionStatus.RECONNECTING ): - log.error("Reconnecting to broker: %s", event.get_cause()) - log.error("Message: %s", event.get_message()) + log.error( + f"{self.error_prefix} Reconnecting to broker: %s", + event.get_cause(), + ) + log.error( + f"{self.error_prefix} Message: %s", + event.get_message(), + ) self.stop_signal.wait(timeout=60) log_thread = threading.Thread(target=log_reconnecting) @@ -114,8 +126,11 @@ def log_reconnecting(): def on_service_interrupted(self, event: "ServiceEvent"): change_connection_status(ConnectionStatus.DISCONNECTED) - log.debug("Service interrupted - Error cause: %s", event.get_cause()) - log.debug("Message: %s", event.get_message()) + log.debug( + f"{self.error_prefix} Service interrupted - Error cause: %s", + event.get_cause(), + ) + log.debug(f"{self.error_prefix} Message: %s", event.get_message()) def set_python_solace_log_level(level: str): @@ -131,7 +146,7 @@ def set_python_solace_log_level(level: str): # Create SolaceMessaging class inheriting from Messaging class SolaceMessaging(Messaging): - def __init__(self, broker_properties: dict, stop_signal): + def __init__(self, broker_properties: dict, broker_name, stop_signal): super().__init__(broker_properties) self.persistent_receivers = [] self.messaging_service = None @@ -139,6 +154,7 @@ def __init__(self, broker_properties: dict, stop_signal): self.publisher = None self.persistent_receiver: PersistentMessageReceiver = None self.stop_signal = stop_signal + self.error_prefix = f"broker[{broker_name}]:" # MessagingService.set_core_messaging_log_level( # level="DEBUG", file="/home/efunnekotter/core.log" # ) @@ -172,7 +188,7 @@ def connect(self): retry_interval = self.broker_properties.get("retry_interval") if not retry_interval: log.warning( - "retry_interval not provided, using default value of 3000 milliseconds" + f"{self.error_prefix} retry_interval not provided, using default value of 3000 milliseconds" ) retry_interval = 3000 self.messaging_service = ( @@ -190,10 +206,14 @@ def connect(self): retry_count = self.broker_properties.get("retry_count") retry_wait = self.broker_properties.get("retry_wait") if not retry_count: - log.warning("retry_count not provided, using default value of 20") + log.warning( + f"{self.error_prefix} retry_count not provided, using default value of 20" + ) retry_count = 20 if not retry_wait: - log.warning("retry_wait not provided, using default value of 3000") + log.warning( + f"{self.error_prefix} retry_wait not provided, using default value of 3000" + ) retry_wait = 3000 self.messaging_service = ( MessagingService.builder() @@ -209,7 +229,7 @@ def connect(self): else: # default log.info( - "Using default reconnection strategy. 20 retries with 3000ms interval" + f"{self.error_prefix} Using default reconnection strategy. 20 retries with 3000ms interval" ) self.messaging_service = ( MessagingService.builder() @@ -236,21 +256,21 @@ def log_connecting(): or self.stop_connection_log.is_set() or result.done() ): - log.info("Connecting to broker...") + log.info(f"{self.error_prefix} Connecting to broker...") self.stop_signal.wait(timeout=30) log_thread = threading.Thread(target=log_connecting) log_thread.start() if result.result() is None: - log.error("Failed to connect to broker") + log.error(f"{self.error_prefix} Failed to connect to broker") return False self.stop_connection_log.set() change_connection_status(ConnectionStatus.CONNECTED) # Event Handling for the messaging service - self.service_handler = ServiceEventHandler(self.stop_signal) + self.service_handler = ServiceEventHandler(self.stop_signal, self.error_prefix) self.messaging_service.add_reconnection_listener(self.service_handler) self.messaging_service.add_reconnection_attempt_listener(self.service_handler) self.messaging_service.add_service_interruption_listener(self.service_handler) @@ -296,7 +316,7 @@ def bind_to_queue( self.persistent_receiver.start() log.debug( - "Persistent receiver started... Bound to Queue [%s] (Temporary: %s)", + f"{self.error_prefix} Persistent receiver started... Bound to Queue [%s] (Temporary: %s)", queue.get_name(), temporary, ) @@ -304,7 +324,7 @@ def bind_to_queue( # Handle API exception except PubSubPlusClientError as exception: log.warning( - "Error creating persistent receiver for queue [%s], %s", + f"{self.error_prefix} Error creating persistent receiver for queue [%s], %s", queue_name, exception, ) @@ -317,7 +337,7 @@ def bind_to_queue( for subscription in subscriptions: sub = TopicSubscription.of(subscription.get("topic")) self.persistent_receiver.add_subscription(sub) - log.debug("Subscribed to topic: %s", subscription) + log.debug(f"{self.error_prefix} Subscribed to topic: %s", subscription) return self.persistent_receiver @@ -326,7 +346,7 @@ def disconnect(self): self.messaging_service.disconnect() change_connection_status(ConnectionStatus.DISCONNECTED) except Exception as exception: # pylint: disable=broad-except - log.debug("Error disconnecting: %s", exception) + log.debug(f"{self.error_prefix} Error disconnecting: %s", exception) def get_connection_status(self): return _connection_status @@ -381,4 +401,6 @@ def ack_message(self, broker_message): if "_original_message" in broker_message: self.persistent_receiver.ack(broker_message["_original_message"]) else: - log.warning("Cannot acknowledge message: original Solace message not found") + log.warning( + f"{self.error_prefix} Cannot acknowledge message: original Solace message not found" + ) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 742df768..032f3636 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -521,7 +521,7 @@ def run_connection_status_monitoring(self) -> None: Get connection status """ try: - if self.config.get("component_module") == "broker_input": + if self.config.get("component_module") in {"broker_input", "broker_output"}: while not self.stop_signal.is_set(): key = tuple( [ diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 37d5c22f..c2270783 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -39,7 +39,10 @@ def __init__(self, module_info, **kwargs): if self.broker_properties["broker_type"] not in ["test", "test_streaming"]: self.messaging_service = ( MessagingServiceBuilder( - self.flow_lock_manager, self.flow_kv_store, self.stop_signal + self.flow_lock_manager, + self.flow_kv_store, + self.name, + self.stop_signal, ) .from_properties(self.broker_properties) .build() @@ -48,7 +51,6 @@ def __init__(self, module_info, **kwargs): self.messages_to_ack = [] self.connected = ConnectionStatus.DISCONNECTED self.needs_acknowledgement = True - self.connection_repeat_sleep_time = 5 @abstractmethod def invoke(self, message, data): From 33e042fd73fa49c27897c8c518883795f69114a5 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 17 Jan 2025 08:44:49 -0500 Subject: [PATCH 48/54] fix: synchronize logs with connection attempts --- .../common/messaging/solace_messaging.py | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 2e28abfe..f5641da5 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -88,9 +88,14 @@ class ServiceEventHandler( ReconnectionListener, ReconnectionAttemptListener, ServiceInterruptionListener ): - def __init__(self, stop_signal, error_prefix=""): + def __init__( + self, stop_signal, strategy, retry_count, retry_interval, error_prefix="" + ): self.stop_signal = stop_signal self.error_prefix = error_prefix + self.strategy = strategy + self.retry_count = retry_count + self.retry_interval = retry_interval def on_reconnected(self, service_event: ServiceEvent): change_connection_status(ConnectionStatus.CONNECTED) @@ -107,10 +112,21 @@ def on_reconnecting(self, event: "ServiceEvent"): change_connection_status(ConnectionStatus.RECONNECTING) def log_reconnecting(): + while ( not self.stop_signal.is_set() and _connection_status == ConnectionStatus.RECONNECTING ): + # update retry count + if self.strategy and self.strategy == "parametrized_retry": + if self.retry_count <= 0: + log.error( + f"{self.error_prefix} Reconnection attempts exhausted. Stopping..." + ) + break + else: + self.retry_count -= 1 + log.error( f"{self.error_prefix} Reconnecting to broker: %s", event.get_cause(), @@ -119,7 +135,7 @@ def log_reconnecting(): f"{self.error_prefix} Message: %s", event.get_message(), ) - self.stop_signal.wait(timeout=60) + self.stop_signal.wait(timeout=self.retry_interval / 1000) log_thread = threading.Thread(target=log_reconnecting) log_thread.start() @@ -184,6 +200,8 @@ def connect(self): or "/usr/share/ca-certificates/mozilla/", } strategy = self.broker_properties.get("reconnection_strategy") + retry_interval = 3000 # default + retry_count = 20 # default if strategy and strategy == "forever_retry": retry_interval = self.broker_properties.get("retry_interval") if not retry_interval: @@ -204,25 +222,25 @@ def connect(self): ) elif strategy and strategy == "parametrized_retry": retry_count = self.broker_properties.get("retry_count") - retry_wait = self.broker_properties.get("retry_wait") + retry_interval = self.broker_properties.get("retry_wait") if not retry_count: log.warning( f"{self.error_prefix} retry_count not provided, using default value of 20" ) retry_count = 20 - if not retry_wait: + if not retry_interval: log.warning( f"{self.error_prefix} retry_wait not provided, using default value of 3000" ) - retry_wait = 3000 + retry_interval = 3000 self.messaging_service = ( MessagingService.builder() .from_properties(broker_props) .with_reconnection_retry_strategy( - RetryStrategy.parametrized_retry(retry_count, retry_wait) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .with_connection_retry_strategy( - RetryStrategy.parametrized_retry(retry_count, retry_wait) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .build() ) @@ -235,10 +253,10 @@ def connect(self): MessagingService.builder() .from_properties(broker_props) .with_reconnection_retry_strategy( - RetryStrategy.parametrized_retry(20, 3000) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .with_connection_retry_strategy( - RetryStrategy.parametrized_retry(20, 3000) + RetryStrategy.parametrized_retry(retry_count, retry_interval) ) .build() ) @@ -251,13 +269,24 @@ def connect(self): self.stop_connection_log = threading.Event() def log_connecting(): + temp_retry_count = retry_count while not ( self.stop_signal.is_set() or self.stop_connection_log.is_set() or result.done() ): + # update retry count + if strategy and strategy == "parametrized_retry": + if temp_retry_count <= 0: + log.error( + f"{self.error_prefix} Connection attempts exhausted. Stopping..." + ) + break + else: + temp_retry_count -= 1 + log.info(f"{self.error_prefix} Connecting to broker...") - self.stop_signal.wait(timeout=30) + self.stop_signal.wait(timeout=retry_interval / 1000) log_thread = threading.Thread(target=log_connecting) log_thread.start() @@ -270,7 +299,9 @@ def log_connecting(): change_connection_status(ConnectionStatus.CONNECTED) # Event Handling for the messaging service - self.service_handler = ServiceEventHandler(self.stop_signal, self.error_prefix) + self.service_handler = ServiceEventHandler( + self.stop_signal, strategy, retry_count, retry_interval, self.error_prefix + ) self.messaging_service.add_reconnection_listener(self.service_handler) self.messaging_service.add_reconnection_attempt_listener(self.service_handler) self.messaging_service.add_service_interruption_listener(self.service_handler) From d8cfd1655bf8ba2e17bf8a7021e92751d78025cc Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 17 Jan 2025 08:53:28 -0500 Subject: [PATCH 49/54] fix: remove datadog dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2958e8dc..1d4cc9e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,6 @@ dependencies = [ "Flask~=3.0.3", "Flask-SocketIO~=5.4.1", "build~=1.2.2.post1", - "datadog~=0.50.2", ] [project.urls] From 8d664d0da91b1468e253076cbec1511eeb745dfe Mon Sep 17 00:00:00 2001 From: alimosaed Date: Fri, 17 Jan 2025 15:44:43 -0500 Subject: [PATCH 50/54] fix: cover an exception --- src/solace_ai_connector/common/messaging/solace_messaging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index f5641da5..ce592794 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -245,10 +245,11 @@ def connect(self): .build() ) else: - # default + # set default log.info( f"{self.error_prefix} Using default reconnection strategy. 20 retries with 3000ms interval" ) + strategy = "parametrized_retry" self.messaging_service = ( MessagingService.builder() .from_properties(broker_props) From 7cac805d93ad73673e8e345277c5ab837665c72d Mon Sep 17 00:00:00 2001 From: alimosaed Date: Mon, 20 Jan 2025 13:01:32 -0500 Subject: [PATCH 51/54] ref: upgrade to latest pubsub and replace a metric --- pyproject.toml | 2 +- requirements.txt | 2 +- src/solace_ai_connector/common/monitoring.py | 4 ++-- src/solace_ai_connector/components/component_base.py | 4 +--- .../components/general/llm/litellm/litellm_base.py | 2 +- .../components/inputs_outputs/broker_input.py | 2 +- .../components/inputs_outputs/broker_output.py | 7 +------ 7 files changed, 8 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cec52eb0..b61270a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "boto3~=1.34.122", "PyYAML~=6.0.1", "Requests~=2.32.3", - "solace_pubsubplus>=1.8.0", + "solace_pubsubplus>=1.9.0", "Flask~=3.0.3", "Flask-SocketIO~=5.4.1", "build~=1.2.2.post1", diff --git a/requirements.txt b/requirements.txt index 58c44a0e..1383f3f8 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ boto3~=1.34.122 PyYAML~=6.0.1 Requests~=2.32.3 -solace_pubsubplus~=1.8.0 +solace_pubsubplus~=1.9.0 Flask~=3.0.3 Flask-SocketIO~=5.4.1 build~=1.2.2.post1 diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index c9a79bc8..9219de63 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -6,7 +6,7 @@ class Metrics(Enum): - SOLCLIENT_STATS_RX_ACKED = "SOLCLIENT_STATS_RX_ACKED" + SOLCLIENT_STATS_RX_SETTLE_ACCEPTED = "SOLCLIENT_STATS_RX_SETTLE_ACCEPTED" SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS = ( "SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS" ) @@ -20,7 +20,7 @@ def get_type(metric: "Metrics") -> str: :return: Type of the metric """ if metric in [ - Metrics.SOLCLIENT_STATS_RX_ACKED, + Metrics.SOLCLIENT_STATS_RX_SETTLE_ACCEPTED, Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, ]: return "integer" diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index da6df791..66c31fa4 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -195,9 +195,7 @@ def process_post_invoke(self, result, message): # Finally send the message to the next component - or if this is the last component, # the component will override send_message and do whatever it needs to do with the message - log.debug( - "%sSending message from %s: %s", self.log_identifier, self.name, message - ) + log.debug("%sSending message from %s", self.log_identifier, self.name) self.send_message(message) @abstractmethod diff --git a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py index 6f1fdc62..f56a2fd8 100644 --- a/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py +++ b/src/solace_ai_connector/components/general/llm/litellm/litellm_base.py @@ -74,7 +74,7 @@ def load_balance(self, messages, stream): response = self.router.completion( model=self.load_balancer[0]["model_name"], messages=messages, stream=stream ) - log.debug("Load balancer response: %s", response) + log.debug("Load balancer responded") return response def invoke(self, message, data): diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 2a0f115f..0288e4ce 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -143,7 +143,7 @@ def get_connection_status(self): def get_metrics(self): required_metrics = [ - Metrics.SOLCLIENT_STATS_RX_ACKED, + Metrics.SOLCLIENT_STATS_RX_SETTLE_ACCEPTED, Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, ] stats_dict = {} diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_output.py b/src/solace_ai_connector/components/inputs_outputs/broker_output.py index 3188c422..27b3a38a 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_output.py @@ -128,12 +128,7 @@ def send_message(self, message: Message): log.info("Discarding message due to TTL expiration: %s", message) return - log.debug( - "Sending message to broker: topic=%s, user_properties=%s, payload=%s", - topic, - user_properties, - payload, - ) + log.debug("Sending message to broker: topic=%s", topic) user_context = None if self.propagate_acknowledgements: user_context = { From 7dfd11378579680574a293e21880f0687e9b0f0a Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 21 Jan 2025 12:04:38 -0500 Subject: [PATCH 52/54] ref: capsulate some variables --- .../common/messaging/solace_messaging.py | 57 +++++++++++++------ src/solace_ai_connector/common/monitoring.py | 2 +- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index ce592794..87919820 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -42,14 +42,9 @@ class ConnectionStatus(Enum): DISCONNECTED = 0 -_connection_status = ConnectionStatus.DISCONNECTED -_lock = threading.Lock() - - -def change_connection_status(status: ConnectionStatus): - global _connection_status - with _lock: - _connection_status = status +def change_connection_status(connection_properties: dict, status): + with connection_properties["lock"]: + connection_properties["status"] = status class MessageHandlerImpl(MessageHandler): @@ -89,16 +84,23 @@ class ServiceEventHandler( ): def __init__( - self, stop_signal, strategy, retry_count, retry_interval, error_prefix="" + self, + stop_signal, + strategy, + retry_count, + retry_interval, + connection_properties, + error_prefix="", ): self.stop_signal = stop_signal self.error_prefix = error_prefix self.strategy = strategy self.retry_count = retry_count self.retry_interval = retry_interval + self.connection_properties = connection_properties def on_reconnected(self, service_event: ServiceEvent): - change_connection_status(ConnectionStatus.CONNECTED) + change_connection_status(self.connection_properties, ConnectionStatus.CONNECTED) log.error( f"{self.error_prefix} Reconnected to broker: %s", service_event.get_cause(), @@ -109,13 +111,16 @@ def on_reconnected(self, service_event: ServiceEvent): ) def on_reconnecting(self, event: "ServiceEvent"): - change_connection_status(ConnectionStatus.RECONNECTING) + change_connection_status( + self.connection_properties, ConnectionStatus.RECONNECTING + ) def log_reconnecting(): while ( not self.stop_signal.is_set() - and _connection_status == ConnectionStatus.RECONNECTING + and self.connection_properties["status"] + == ConnectionStatus.RECONNECTING ): # update retry count if self.strategy and self.strategy == "parametrized_retry": @@ -141,7 +146,9 @@ def log_reconnecting(): log_thread.start() def on_service_interrupted(self, event: "ServiceEvent"): - change_connection_status(ConnectionStatus.DISCONNECTED) + change_connection_status( + self.connection_properties, ConnectionStatus.DISCONNECTED + ) log.debug( f"{self.error_prefix} Service interrupted - Error cause: %s", event.get_cause(), @@ -170,6 +177,11 @@ def __init__(self, broker_properties: dict, broker_name, stop_signal): self.publisher = None self.persistent_receiver: PersistentMessageReceiver = None self.stop_signal = stop_signal + self.connection_properties = { + "status": ConnectionStatus.DISCONNECTED, + "lock": threading.Lock(), + } + self.error_prefix = f"broker[{broker_name}]:" # MessagingService.set_core_messaging_log_level( # level="DEBUG", file="/home/efunnekotter/core.log" @@ -177,7 +189,9 @@ def __init__(self, broker_properties: dict, broker_name, stop_signal): # set_python_solace_log_level("DEBUG") def __del__(self): - change_connection_status(ConnectionStatus.DISCONNECTED) + change_connection_status( + self.connection_properties, ConnectionStatus.DISCONNECTED + ) self.disconnect() def connect(self): @@ -297,11 +311,16 @@ def log_connecting(): return False self.stop_connection_log.set() - change_connection_status(ConnectionStatus.CONNECTED) + change_connection_status(self.connection_properties, ConnectionStatus.CONNECTED) # Event Handling for the messaging service self.service_handler = ServiceEventHandler( - self.stop_signal, strategy, retry_count, retry_interval, self.error_prefix + self.stop_signal, + strategy, + retry_count, + retry_interval, + self.connection_properties, + self.error_prefix, ) self.messaging_service.add_reconnection_listener(self.service_handler) self.messaging_service.add_reconnection_attempt_listener(self.service_handler) @@ -376,12 +395,14 @@ def bind_to_queue( def disconnect(self): try: self.messaging_service.disconnect() - change_connection_status(ConnectionStatus.DISCONNECTED) + change_connection_status( + self.connection_properties, ConnectionStatus.DISCONNECTED + ) except Exception as exception: # pylint: disable=broad-except log.debug(f"{self.error_prefix} Error disconnecting: %s", exception) def get_connection_status(self): - return _connection_status + return self.connection_properties["status"] def send_message( self, diff --git a/src/solace_ai_connector/common/monitoring.py b/src/solace_ai_connector/common/monitoring.py index 9219de63..09a2af5c 100644 --- a/src/solace_ai_connector/common/monitoring.py +++ b/src/solace_ai_connector/common/monitoring.py @@ -182,7 +182,7 @@ def get_aggregated_metrics( metric_timestamp = value.timestamp if metric in [ - Metrics.SOLCLIENT_STATS_RX_ACKED, + Metrics.SOLCLIENT_STATS_RX_SETTLE_ACCEPTED, Metrics.SOLCLIENT_STATS_TX_TOTAL_CONNECTION_ATTEMPTS, ]: # add metrics that need to be aggregated by sum aggregated_metrics[new_key].value += sum(metric_value) From b98218989dec4043b058f31237e4edf2b3003774 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 21 Jan 2025 13:22:04 -0500 Subject: [PATCH 53/54] ref: enable daemon for threads to close them safely --- .../common/messaging/solace_messaging.py | 4 +-- .../components/component_base.py | 8 +++--- .../inputs_outputs/broker_request_response.py | 26 ++++++++++++------- .../inputs_outputs/websocket_output.py | 3 ++- src/solace_ai_connector/flow/timer_manager.py | 4 ++- .../services/cache_service.py | 23 +++++++++++----- .../solace_ai_connector.py | 2 +- 7 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 87919820..68bfb821 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -142,7 +142,7 @@ def log_reconnecting(): ) self.stop_signal.wait(timeout=self.retry_interval / 1000) - log_thread = threading.Thread(target=log_reconnecting) + log_thread = threading.Thread(target=log_reconnecting, daemon=True) log_thread.start() def on_service_interrupted(self, event: "ServiceEvent"): @@ -303,7 +303,7 @@ def log_connecting(): log.info(f"{self.error_prefix} Connecting to broker...") self.stop_signal.wait(timeout=retry_interval / 1000) - log_thread = threading.Thread(target=log_connecting) + log_thread = threading.Thread(target=log_connecting, daemon=True) log_thread.start() if result.result() is None: diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 66c31fa4..4f456bbf 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -75,15 +75,17 @@ def reset_sleep_time(self): self.event_message_repeat_sleep_time = 1 def create_thread_and_run(self): - self.thread = threading.Thread(target=self.run) + self.thread = threading.Thread(target=self.run, daemon=True) self.thread.start() return self.thread def run(self): # Start the micro monitoring thread - monitoring_thread = threading.Thread(target=self.run_micro_monitoring) + monitoring_thread = threading.Thread( + target=self.run_micro_monitoring, daemon=True + ) connection_status_thread = threading.Thread( - target=self.run_connection_status_monitoring + target=self.run_connection_status_monitoring, daemon=True ) monitoring_thread.start() connection_status_thread.start() diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py index 59d8bdcd..04d5a5da 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_request_response.py @@ -287,10 +287,12 @@ def setup_test_pass_through(self): def start_response_thread(self): if self.test_mode: self.response_thread = threading.Thread( - target=self.handle_test_pass_through + target=self.handle_test_pass_through, daemon=True ) else: - self.response_thread = threading.Thread(target=self.handle_responses) + self.response_thread = threading.Thread( + target=self.handle_responses, daemon=True + ) self.response_thread.start() def handle_responses(self): @@ -332,7 +334,9 @@ def process_response(self, broker_message): return streaming_complete_expression = None - metadata_json = get_data_value(user_properties, self.user_properties_reply_metadata_key, True) + metadata_json = get_data_value( + user_properties, self.user_properties_reply_metadata_key, True + ) if not metadata_json: log.error("Received response without metadata: %s", payload) return @@ -390,8 +394,12 @@ def process_response(self, broker_message): ) else: # Remove the metadata and reply topic from the user properties - remove_data_value(response["user_properties"], self.user_properties_reply_metadata_key) - remove_data_value(response["user_properties"], self.user_properties_reply_topic_key) + remove_data_value( + response["user_properties"], self.user_properties_reply_metadata_key + ) + remove_data_value( + response["user_properties"], self.user_properties_reply_topic_key + ) message = Message( payload=payload, @@ -431,9 +439,7 @@ def invoke(self, message, data): metadata = {"request_id": request_id, "response_topic": topic} existing_metadata_json = get_data_value( - data["user_properties"], - self.user_properties_reply_metadata_key, - True + data["user_properties"], self.user_properties_reply_metadata_key, True ) if existing_metadata_json: try: @@ -454,7 +460,9 @@ def invoke(self, message, data): metadata = [metadata] set_data_value( - data["user_properties"], self.user_properties_reply_metadata_key, json.dumps(metadata) + data["user_properties"], + self.user_properties_reply_metadata_key, + json.dumps(metadata), ) set_data_value( data["user_properties"], self.user_properties_reply_topic_key, topic diff --git a/src/solace_ai_connector/components/inputs_outputs/websocket_output.py b/src/solace_ai_connector/components/inputs_outputs/websocket_output.py index d7320645..7f69ac36 100644 --- a/src/solace_ai_connector/components/inputs_outputs/websocket_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/websocket_output.py @@ -30,6 +30,7 @@ class WebsocketOutput(WebsocketBase): + def __init__(self, **kwargs): super().__init__(info, **kwargs) self.payload_encoding = self.get_config("payload_encoding") @@ -38,7 +39,7 @@ def __init__(self, **kwargs): def run(self): if self.listen_port: - self.server_thread = threading.Thread(target=self.run_server) + self.server_thread = threading.Thread(target=self.run_server, daemon=True) self.server_thread.start() super().run() diff --git a/src/solace_ai_connector/flow/timer_manager.py b/src/solace_ai_connector/flow/timer_manager.py index 6b091a74..94a400f3 100644 --- a/src/solace_ai_connector/flow/timer_manager.py +++ b/src/solace_ai_connector/flow/timer_manager.py @@ -6,6 +6,7 @@ class Timer: + def __init__(self, expiration, interval, component, timer_id, payload=None): self.expiration = expiration self.interval = interval @@ -18,12 +19,13 @@ def __lt__(self, other): class TimerManager: + def __init__(self, stop_signal): self.timers = [] self.lock = threading.Lock() self.stop_signal = stop_signal self.event = threading.Event() - self.thread = threading.Thread(target=self.run) + self.thread = threading.Thread(target=self.run, daemon=True) self.thread.start() def add_timer(self, delay_ms, component, timer_id, interval_ms=None, payload=None): diff --git a/src/solace_ai_connector/services/cache_service.py b/src/solace_ai_connector/services/cache_service.py index 0b0ff3f0..135d40d9 100644 --- a/src/solace_ai_connector/services/cache_service.py +++ b/src/solace_ai_connector/services/cache_service.py @@ -11,6 +11,7 @@ class CacheStorageBackend(ABC): + @abstractmethod def get(self, key: str, include_meta=False) -> Any: pass @@ -35,6 +36,7 @@ def get_all(self) -> Dict[str, Tuple[Any, Optional[Dict], Optional[float]]]: class InMemoryStorage(CacheStorageBackend): + def __init__(self): self.store: Dict[str, Dict[str, Any]] = {} self.lock = Lock() @@ -97,6 +99,7 @@ class CacheItem(Base): class SQLAlchemyStorage(CacheStorageBackend): + def __init__(self, connection_string: str): self.engine = create_engine(connection_string) Base.metadata.create_all(self.engine) @@ -112,12 +115,16 @@ def get(self, key: str, include_meta=False) -> Any: session.delete(item) session.commit() return None - if include_meta: + if include_meta: return { "value": pickle.loads(item.value), - "metadata": pickle.loads(item.item_metadata) if item.item_metadata else None, + "metadata": pickle.loads(item.item_metadata) + if item.item_metadata + else None, "expiry": item.expiry, - "component": self._get_component_from_reference(item.component_reference), + "component": self._get_component_from_reference( + item.component_reference + ), } return pickle.loads(item.value), ( pickle.loads(item.item_metadata) if item.item_metadata else None @@ -191,12 +198,15 @@ def _get_component_from_reference(self, reference): class CacheService: + def __init__(self, storage_backend: CacheStorageBackend): self.storage = storage_backend self.next_expiry = None self.expiry_event = threading.Event() self.stop_event = threading.Event() - self.expiry_thread = threading.Thread(target=self._expiry_check_loop) + self.expiry_thread = threading.Thread( + target=self._expiry_check_loop, daemon=True + ) self.expiry_thread.start() self.lock = Lock() @@ -269,11 +279,12 @@ def _check_expirations(self): self.storage.delete(key) self.next_expiry = next_expiry - + for key, metadata, component, value in expired_keys: if component: event = Event( - EventType.CACHE_EXPIRY, {"key": key, "metadata": metadata, "expired_data": value} + EventType.CACHE_EXPIRY, + {"key": key, "metadata": metadata, "expired_data": value}, ) component.enqueue(event) diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 23155ce8..1790ee80 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -166,7 +166,7 @@ def setup_trace(self): self.trace_queue = queue.Queue() # Start a new thread to handle trace messages self.trace_thread = threading.Thread( - target=self.handle_trace, args=(trace_file,) + target=self.handle_trace, args=(trace_file,), daemon=True ) self.trace_thread.start() From 9bfc6e6910c258457aa51588a6556f934b1d6233 Mon Sep 17 00:00:00 2001 From: alimosaed Date: Tue, 21 Jan 2025 14:44:30 -0500 Subject: [PATCH 54/54] ref: remove useless variable --- src/solace_ai_connector/components/inputs_outputs/broker_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 256cb09c..9866e8a6 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -101,7 +101,6 @@ def __init__(self, module_info, **kwargs): self.messages_to_ack = [] self.connected = ConnectionStatus.DISCONNECTED self.needs_acknowledgement = True - self.connection_repeat_sleep_time = 5 @abstractmethod def invoke(self, message, data):