Skip to content

Commit

Permalink
WIP: weave pii redaction config
Browse files Browse the repository at this point in the history
  • Loading branch information
niall-wb committed Jan 14, 2025
1 parent 0f59950 commit 724e91a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 19 deletions.
13 changes: 2 additions & 11 deletions weave/trace/pii_redaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@


def redact_pii(data):
if not should_redact_pii():
return data
analyzer = AnalyzerEngine()
anonymizer = AnonymizerEngine()

def redact_value(value):
if isinstance(value, str):
# Analyze and anonymize the string
results = analyzer.analyze(text=value, language='en') # TODO: support more languages.
anonymized_text = anonymizer.anonymize(text=value, analyzer_results=results)
return anonymized_text.text
Expand All @@ -23,18 +22,10 @@ def redact_value(value):
else:
return value

# Initialize the Presidio Analyzer and Anonymizer
analyzer = AnalyzerEngine()
anonymizer = AnonymizerEngine()

# Check if the input is a string or a dictionary
if isinstance(data, str):
# If it's a string, treat it as a JSON string and parse it
try:
data = json.loads(data)
except json.JSONDecodeError:
# If it's not a valid JSON, just redact the string itself
return redact_value(data)

# Redact PII from the dictionary
return {k: redact_value(v) for k, v in data.items()}
24 changes: 16 additions & 8 deletions weave/trace/weave_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from weave.trace.sanitize import REDACTED_VALUE, should_redact
from weave.trace.serialize import from_json, isinstance_namedtuple, to_json
from weave.trace.serializer import get_serializer_for_obj
from weave.trace.settings import client_parallelism
from weave.trace.settings import client_parallelism, should_redact_pii
from weave.trace.table import Table
from weave.trace.util import deprecated, log_once
from weave.trace.vals import WeaveObject, WeaveTable, make_trace_obj
Expand Down Expand Up @@ -817,12 +817,17 @@ def create_call(
unbound_op = maybe_unbind_method(op)
op_def_ref = self._save_op(unbound_op)

inputs_redacted = redact_sensitive_keys(inputs)
inputs_pii_redacted = redact_pii(inputs_redacted)
inputs_sensitive_keys_redacted = redact_sensitive_keys(inputs)

if should_redact_pii():
prepared_inputs = redact_pii(inputs_sensitive_keys_redacted)
else:
prepared_inputs = inputs_sensitive_keys_redacted

if op.postprocess_inputs:
inputs_postprocessed = op.postprocess_inputs(inputs_pii_redacted)
inputs_postprocessed = op.postprocess_inputs(prepared_inputs)
else:
inputs_postprocessed = inputs_pii_redacted
inputs_postprocessed = prepared_inputs

if _global_postprocess_inputs:
inputs_postprocessed = _global_postprocess_inputs(inputs_postprocessed)
Expand Down Expand Up @@ -920,12 +925,15 @@ def finish_call(
call.ended_at = ended_at
original_output = output

redacted_output = redact_pii(original_output)
if should_redact_pii():
prepared_output = redact_pii(original_output)
else:
prepared_output = original_output

if op is not None and op.postprocess_output:
postprocessed_output = op.postprocess_output(redacted_output)
postprocessed_output = op.postprocess_output(prepared_output)
else:
postprocessed_output = redacted_output
postprocessed_output = prepared_output

if _global_postprocess_output:
postprocessed_output = _global_postprocess_output(postprocessed_output)
Expand Down

0 comments on commit 724e91a

Please sign in to comment.