feat: privacy_mode (#164)

PostHog · Jan 15, 2025 · 7870ccd · 7870ccd
1 parent 190c628
commit 7870ccd
Show file tree

Hide file tree

Showing 9 changed files with 229 additions and 42 deletions.
diff --git a/llm_observability_examples.py b/llm_observability_examples.py
@@ -9,6 +9,8 @@
 posthog.personal_api_key = os.getenv("POSTHOG_PERSONAL_API_KEY", "your-personal-api-key")
 posthog.host = os.getenv("POSTHOG_HOST", "http://localhost:8000")  # Or https://app.posthog.com
 posthog.debug = True
+# change this to False to see usage events
+# posthog.privacy_mode = True
 
 openai_client = OpenAI(
     api_key=os.getenv("OPENAI_API_KEY", "your-openai-api-key"),
@@ -26,11 +28,12 @@ def main_sync():
     print("Trace ID:", trace_id)
     distinct_id = "test2_distinct_id"
     properties = {"test_property": "test_value"}
+    groups = {"company": "test_company"}
 
     try:
-        basic_openai_call(distinct_id, trace_id, properties)
-        streaming_openai_call(distinct_id, trace_id, properties)
-        embedding_openai_call(distinct_id, trace_id, properties)
+        basic_openai_call(distinct_id, trace_id, properties, groups)
+        streaming_openai_call(distinct_id, trace_id, properties, groups)
+        embedding_openai_call(distinct_id, trace_id, properties, groups)
         image_openai_call()
     except Exception as e:
         print("Error during OpenAI call:", str(e))
@@ -41,17 +44,18 @@ async def main_async():
     print("Trace ID:", trace_id)
     distinct_id = "test_distinct_id"
     properties = {"test_property": "test_value"}
+    groups = {"company": "test_company"}
 
     try:
-        await basic_async_openai_call(distinct_id, trace_id, properties)
-        await streaming_async_openai_call(distinct_id, trace_id, properties)
-        await embedding_async_openai_call(distinct_id, trace_id, properties)
+        await basic_async_openai_call(distinct_id, trace_id, properties, groups)
+        await streaming_async_openai_call(distinct_id, trace_id, properties, groups)
+        await embedding_async_openai_call(distinct_id, trace_id, properties, groups)
         await image_async_openai_call()
     except Exception as e:
         print("Error during OpenAI call:", str(e))
 
 
-def basic_openai_call(distinct_id, trace_id, properties):
+def basic_openai_call(distinct_id, trace_id, properties, groups):
     response = openai_client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -63,6 +67,7 @@ def basic_openai_call(distinct_id, trace_id, properties):
         posthog_distinct_id=distinct_id,
         posthog_trace_id=trace_id,
         posthog_properties=properties,
+        posthog_groups=groups,
     )
     print(response)
     if response and response.choices:
@@ -72,7 +77,7 @@ def basic_openai_call(distinct_id, trace_id, properties):
     return response
 
 
-async def basic_async_openai_call(distinct_id, trace_id, properties):
+async def basic_async_openai_call(distinct_id, trace_id, properties, groups):
     response = await async_openai_client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -84,6 +89,7 @@ async def basic_async_openai_call(distinct_id, trace_id, properties):
         posthog_distinct_id=distinct_id,
         posthog_trace_id=trace_id,
         posthog_properties=properties,
+        posthog_groups=groups,
     )
     if response and hasattr(response, "choices"):
         print("OpenAI response:", response.choices[0].message.content)
@@ -92,7 +98,7 @@ async def basic_async_openai_call(distinct_id, trace_id, properties):
     return response
 
 
-def streaming_openai_call(distinct_id, trace_id, properties):
+def streaming_openai_call(distinct_id, trace_id, properties, groups):
 
     response = openai_client.chat.completions.create(
         model="gpt-4o-mini",
@@ -106,6 +112,7 @@ def streaming_openai_call(distinct_id, trace_id, properties):
         posthog_distinct_id=distinct_id,
         posthog_trace_id=trace_id,
         posthog_properties=properties,
+        posthog_groups=groups,
     )
 
     for chunk in response:
@@ -115,7 +122,7 @@ def streaming_openai_call(distinct_id, trace_id, properties):
     return response
 
 
-async def streaming_async_openai_call(distinct_id, trace_id, properties):
+async def streaming_async_openai_call(distinct_id, trace_id, properties, groups):
     response = await async_openai_client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -128,6 +135,7 @@ async def streaming_async_openai_call(distinct_id, trace_id, properties):
         posthog_distinct_id=distinct_id,
         posthog_trace_id=trace_id,
         posthog_properties=properties,
+        posthog_groups=groups,
     )
 
     async for chunk in response:
@@ -153,25 +161,27 @@ async def image_async_openai_call():
     return response
 
 
-def embedding_openai_call(posthog_distinct_id, posthog_trace_id, posthog_properties):
+def embedding_openai_call(posthog_distinct_id, posthog_trace_id, posthog_properties, posthog_groups):
     response = openai_client.embeddings.create(
         input="The hedgehog is cute",
         model="text-embedding-3-small",
         posthog_distinct_id=posthog_distinct_id,
         posthog_trace_id=posthog_trace_id,
         posthog_properties=posthog_properties,
+        posthog_groups=posthog_groups,
     )
     print(response)
     return response
 
 
-async def embedding_async_openai_call(posthog_distinct_id, posthog_trace_id, posthog_properties):
+async def embedding_async_openai_call(posthog_distinct_id, posthog_trace_id, posthog_properties, posthog_groups):
     response = await async_openai_client.embeddings.create(
         input="The hedgehog is cute",
         model="text-embedding-3-small",
         posthog_distinct_id=posthog_distinct_id,
         posthog_trace_id=posthog_trace_id,
         posthog_properties=posthog_properties,
+        posthog_groups=posthog_groups,
     )
     print(response)
     return response

diff --git a/posthog/__init__.py b/posthog/__init__.py
@@ -26,6 +26,8 @@
 exception_autocapture_integrations = []  # type: List[Integrations]
 # Used to determine in app paths for exception autocapture. Defaults to the current working directory
 project_root = None  # type: Optional[str]
+# Used for our AI observability feature to not capture any prompt or output just usage + metadata
+privacy_mode = False  # type: bool
 
 default_client = None  # type: Optional[Client]
 

diff --git a/posthog/ai/langchain/callbacks.py b/posthog/ai/langchain/callbacks.py
@@ -23,7 +23,7 @@
 from langchain_core.outputs import ChatGeneration, LLMResult
 from pydantic import BaseModel
 
-from posthog.ai.utils import get_model_params
+from posthog.ai.utils import get_model_params, with_privacy_mode
 from posthog.client import Client
 
 log = logging.getLogger("posthog")
@@ -69,18 +69,24 @@ def __init__(
         distinct_id: Optional[Union[str, int, float, UUID]] = None,
         trace_id: Optional[Union[str, int, float, UUID]] = None,
         properties: Optional[Dict[str, Any]] = None,
+        privacy_mode: bool = False,
+        groups: Optional[Dict[str, Any]] = None,
     ):
         """
         Args:
             client: PostHog client instance.
             distinct_id: Optional distinct ID of the user to associate the trace with.
             trace_id: Optional trace ID to use for the event.
             properties: Optional additional metadata to use for the trace.
+            privacy_mode: Whether to redact the input and output of the trace.
+            groups: Optional additional PostHog groups to use for the trace.
         """
         self._client = client
         self._distinct_id = distinct_id
         self._trace_id = trace_id
         self._properties = properties or {}
+        self._privacy_mode = privacy_mode
+        self._groups = groups or {}
         self._runs = {}
         self._parent_tree = {}
 
@@ -164,8 +170,8 @@ def on_llm_end(
             "$ai_provider": run.get("provider"),
             "$ai_model": run.get("model"),
             "$ai_model_parameters": run.get("model_params"),
-            "$ai_input": run.get("messages"),
-            "$ai_output": {"choices": output},
+            "$ai_input": with_privacy_mode(self._client, self._privacy_mode, run.get("messages")),
+            "$ai_output": with_privacy_mode(self._client, self._privacy_mode, {"choices": output}),
             "$ai_http_status": 200,
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
@@ -180,6 +186,7 @@ def on_llm_end(
             distinct_id=self._distinct_id or trace_id,
             event="$ai_generation",
             properties=event_properties,
+            groups=self._groups,
         )
 
     def on_chain_error(
@@ -212,7 +219,7 @@ def on_llm_error(
             "$ai_provider": run.get("provider"),
             "$ai_model": run.get("model"),
             "$ai_model_parameters": run.get("model_params"),
-            "$ai_input": run.get("messages"),
+            "$ai_input": with_privacy_mode(self._client, self._privacy_mode, run.get("messages")),
             "$ai_http_status": _get_http_status(error),
             "$ai_latency": latency,
             "$ai_trace_id": trace_id,
@@ -225,6 +232,7 @@ def on_llm_error(
             distinct_id=self._distinct_id or trace_id,
             event="$ai_generation",
             properties=event_properties,
+            groups=self._groups,
         )
 
     def _set_parent_of_run(self, run_id: UUID, parent_run_id: Optional[UUID] = None):

diff --git a/posthog/ai/openai/openai.py b/posthog/ai/openai/openai.py
@@ -8,7 +8,7 @@
 except ImportError:
     raise ModuleNotFoundError("Please install the OpenAI SDK to use this feature: 'pip install openai'")
 
-from posthog.ai.utils import call_llm_and_track_usage, get_model_params
+from posthog.ai.utils import call_llm_and_track_usage, get_model_params, with_privacy_mode
 from posthog.client import Client as PostHogClient
 
 
@@ -49,6 +49,8 @@ def create(
         posthog_distinct_id: Optional[str] = None,
         posthog_trace_id: Optional[str] = None,
         posthog_properties: Optional[Dict[str, Any]] = None,
+        posthog_privacy_mode: bool = False,
+        posthog_groups: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ):
         if posthog_trace_id is None:
@@ -59,6 +61,8 @@ def create(
                 posthog_distinct_id,
                 posthog_trace_id,
                 posthog_properties,
+                posthog_privacy_mode,
+                posthog_groups,
                 **kwargs,
             )
 
@@ -67,6 +71,8 @@ def create(
             self._client._ph_client,
             posthog_trace_id,
             posthog_properties,
+            posthog_privacy_mode,
+            posthog_groups,
             self._client.base_url,
             super().create,
             **kwargs,
@@ -77,6 +83,8 @@ def _create_streaming(
         posthog_distinct_id: Optional[str],
         posthog_trace_id: Optional[str],
         posthog_properties: Optional[Dict[str, Any]],
+        posthog_privacy_mode: bool,
+        posthog_groups: Optional[Dict[str, Any]],
         **kwargs: Any,
     ):
         start_time = time.time()
@@ -117,6 +125,8 @@ def generator():
                     posthog_distinct_id,
                     posthog_trace_id,
                     posthog_properties,
+                    posthog_privacy_mode,
+                    posthog_groups,
                     kwargs,
                     usage_stats,
                     latency,
@@ -130,6 +140,8 @@ def _capture_streaming_event(
         posthog_distinct_id: Optional[str],
         posthog_trace_id: Optional[str],
         posthog_properties: Optional[Dict[str, Any]],
+        posthog_privacy_mode: bool,
+        posthog_groups: Optional[Dict[str, Any]],
         kwargs: Dict[str, Any],
         usage_stats: Dict[str, int],
         latency: float,
@@ -142,15 +154,19 @@ def _capture_streaming_event(
             "$ai_provider": "openai",
             "$ai_model": kwargs.get("model"),
             "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": kwargs.get("messages"),
-            "$ai_output": {
-                "choices": [
-                    {
-                        "content": output,
-                        "role": "assistant",
-                    }
-                ]
-            },
+            "$ai_input": with_privacy_mode(self._client._ph_client, posthog_privacy_mode, kwargs.get("messages")),
+            "$ai_output": with_privacy_mode(
+                self._client._ph_client,
+                posthog_privacy_mode,
+                {
+                    "choices": [
+                        {
+                            "content": output,
+                            "role": "assistant",
+                        }
+                    ]
+                },
+            ),
             "$ai_http_status": 200,
             "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
             "$ai_output_tokens": usage_stats.get("completion_tokens", 0),
@@ -168,6 +184,7 @@ def _capture_streaming_event(
                 distinct_id=posthog_distinct_id or posthog_trace_id,
                 event="$ai_generation",
                 properties=event_properties,
+                groups=posthog_groups,
             )
 
 
@@ -179,6 +196,8 @@ def create(
         posthog_distinct_id: Optional[str] = None,
         posthog_trace_id: Optional[str] = None,
         posthog_properties: Optional[Dict[str, Any]] = None,
+        posthog_privacy_mode: bool = False,
+        posthog_groups: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ):
         """
@@ -214,7 +233,7 @@ def create(
         event_properties = {
             "$ai_provider": "openai",
             "$ai_model": kwargs.get("model"),
-            "$ai_input": kwargs.get("input"),
+            "$ai_input": with_privacy_mode(self._client._ph_client, posthog_privacy_mode, kwargs.get("input")),
             "$ai_http_status": 200,
             "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
             "$ai_latency": latency,
@@ -232,6 +251,7 @@ def create(
                 distinct_id=posthog_distinct_id or posthog_trace_id,
                 event="$ai_embedding",
                 properties=event_properties,
+                groups=posthog_groups,
             )
 
         return response