Merge remote-tracking branch 'origin/master' into add_more_scorers

wandb · Oct 21, 2024 · 3450317 · 3450317
2 parents 2a0fd55 + fbee7c3
commit 3450317
Show file tree

Hide file tree

Showing 29 changed files with 616 additions and 109 deletions.
diff --git a/dev_docs/RELEASE.md b/dev_docs/RELEASE.md
@@ -12,7 +12,7 @@ This document outlines how to publish a new Weave release to our public [PyPI pa
    - Both of these commits will be pushed to master
    - Note: if you need to make a new minor or major release, you can do these steps manually with 2 PRs modifying the files in `weave/version.py` and `weave/pyproject.toml`
 
-4. Go to the [publish-pypi-release](https://github.com/wandb/weave/actions/workflows/release.yaml) GitHub action and trigger a new release from the `x.y.z` branch (NOT MASTER). For riskier releases, make sure `Use Test PyPI` is checked so we first publish a test package.
+4. Go to the [publish-pypi-release](https://github.com/wandb/weave/actions/workflows/release.yaml) GitHub action and trigger a new release from the `x.y.z` tag (NOT MASTER). For riskier releases, make sure `Use Test PyPI` is checked so we first publish a test package to [test.pypi.org](https://test.pypi.org/project/weave/#history).
 
 5. Verify the new version of Weave exists in [PyPI](https://pypi.org/project/weave/) once it is complete.
 

diff --git a/docs/docs/guides/core-types/imgs/audio-trace.png b/docs/docs/guides/core-types/imgs/audio-trace.png
diff --git a/docs/docs/guides/core-types/imgs/cat-pumpkin-trace.png b/docs/docs/guides/core-types/imgs/cat-pumpkin-trace.png
diff --git a/docs/docs/guides/core-types/media.md b/docs/docs/guides/core-types/media.md
@@ -0,0 +1,74 @@
+# Logging media
+
+Weave supports logging and displaying multiple first class media types. Log images with `PIL.Image` and audio with `wave.Wave_read` either directly with the object API, or as the inputs or output of an op.
+
+## Images
+
+Logging type: `PIL.Image`. Here is an example of logging an image with the OpenAI DALL-E API:
+
+```python
+import weave
+from openai import OpenAI
+import requests
+from PIL import Image
+
+
+weave.init('image-example')
+client = OpenAI()
+
+@weave.op
+def generate_image(prompt: str) -> Image:
+    response = client.images.generate(
+        model="dall-e-3",
+        prompt=prompt,
+        size="1024x1024",
+        quality="standard",
+        n=1,
+    )
+    image_url = response.data[0].url
+    image_response = requests.get(image_url, stream=True)
+    image = Image.open(image_response.raw)
+
+    # return an Image.Image object to be logged as an image
+    return image
+
+generate_image("a cat with a pumpkin hat")
+```
+
+This image will be logged to weave and automatically displayed in the UI. The following is the trace view for above.
+
+![Screenshot of pumpkin cat trace view](imgs/cat-pumpkin-trace.png)
+
+## Audio
+
+Logging type: `wave.Wave_read`. Here is an example of logging an audio file using openai's speech generation API.
+
+```python
+import weave
+from openai import OpenAI
+import wave
+
+
+weave.init("audio-example")
+client = OpenAI()
+
+
+@weave.op
+def make_audio_file_streaming(text: str) -> wave.Wave_read:
+    with client.audio.speech.with_streaming_response.create(
+        model="tts-1",
+        voice="alloy",
+        input=text,
+        response_format="wav",
+    ) as res:
+        res.stream_to_file("output.wav")
+
+    # return a wave.Wave_read object to be logged as audio
+    return wave.open("output.wav")
+
+make_audio_file_streaming("Hello, how are you?")
+```
+
+This audio will be logged to weave and automatically displayed in the UI, with an audio player. The player can be expanded to view the raw audio waveform, in addition to a download button.
+
+![Screenshot of audio trace view](imgs/audio-trace.png)
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
@@ -68,6 +68,7 @@ const sidebars: SidebarsConfig = {
         "guides/core-types/datasets",
         "guides/tracking/feedback",
         "guides/tracking/costs",
+        "guides/core-types/media",
         {
           type: "category",
           collapsible: true,

diff --git a/pyproject.toml b/pyproject.toml
@@ -191,7 +191,7 @@ module = "weave_query.*"
 ignore_errors = true
 
 [tool.bumpversion]
-current_version = "0.51.17-dev0"
+current_version = "0.51.18-dev0"
 parse = """(?x)
     (?P<major>0|[1-9]\\d*)\\.
     (?P<minor>0|[1-9]\\d*)\\.

diff --git a/tests/integrations/dspy/dspy_test.py b/tests/integrations/dspy/dspy_test.py
@@ -106,37 +106,21 @@ def test_dspy_inline_signatures(client: WeaveClient) -> None:
     call_1, _ = flattened_calls[0]
     assert call_1.exception is None and call_1.ended_at is not None
     output_1 = call_1.output
-    assert output_1 == {
-        "__class__": {
-            "module": "dspy.primitives.prediction",
-            "qualname": "Prediction",
-            "name": "Prediction",
-        },
-        "completions": {
-            "__class__": {
-                "module": "dspy.primitives.prediction",
-                "qualname": "Completions",
-                "name": "Completions",
-            },
-        },
-    }
+    assert (
+        output_1
+        == """Prediction(
+    sentiment='Positive'
+)"""
+    )
     call_2, _ = flattened_calls[1]
     assert call_2.exception is None and call_2.ended_at is not None
     output_2 = call_2.output
-    assert output_2 == {
-        "__class__": {
-            "module": "dspy.primitives.prediction",
-            "qualname": "Prediction",
-            "name": "Prediction",
-        },
-        "completions": {
-            "__class__": {
-                "module": "dspy.primitives.prediction",
-                "qualname": "Completions",
-                "name": "Completions",
-            },
-        },
-    }
+    assert (
+        output_2
+        == """Prediction(
+    sentiment='Positive'
+)"""
+    )
 
     call_3, _ = flattened_calls[2]
     assert call_3.exception is None and call_3.ended_at is not None

diff --git a/tests/integrations/google_ai_studio/google_ai_studio_test.py b/tests/integrations/google_ai_studio/google_ai_studio_test.py
@@ -65,8 +65,9 @@ def test_content_generation(client):
     trace_name = op_name_from_ref(call.op_name)
     assert trace_name == "google.generativeai.GenerativeModel.generate_content"
     assert call.output is not None
-    assert_correct_output_shape(call.output)
-    assert_correct_summary(call.summary, trace_name)
+    # TODO: Re-enable after dictify is fixed
+    # assert_correct_output_shape(call.output)
+    # assert_correct_summary(call.summary, trace_name)
 
 
 @pytest.mark.retry(max_attempts=5)
@@ -91,8 +92,9 @@ def test_content_generation_stream(client):
     trace_name = op_name_from_ref(call.op_name)
     assert trace_name == "google.generativeai.GenerativeModel.generate_content"
     assert call.output is not None
-    assert_correct_output_shape(call.output)
-    assert_correct_summary(call.summary, trace_name)
+    # TODO: Re-enable after dictify is fixed
+    # assert_correct_output_shape(call.output)
+    # assert_correct_summary(call.summary, trace_name)
 
 
 @pytest.mark.retry(max_attempts=5)
@@ -114,5 +116,6 @@ async def test_content_generation_async(client):
     trace_name = op_name_from_ref(call.op_name)
     assert trace_name == "google.generativeai.GenerativeModel.generate_content_async"
     assert call.output is not None
-    assert_correct_output_shape(call.output)
-    assert_correct_summary(call.summary, trace_name)
+    # TODO: Re-enable after dictify is fixed
+    # assert_correct_output_shape(call.output)
+    # assert_correct_summary(call.summary, trace_name)
diff --git a/tests/trace/test_client_trace.py b/tests/trace/test_client_trace.py
@@ -1506,24 +1506,10 @@ def op_with_unknown_types(a: MyUnknownClassA, b: float) -> MyUnknownClassB:
 
     assert len(inner_res.calls) == 1
     assert inner_res.calls[0].inputs == {
-        "a": {
-            "__class__": {
-                "module": "test_client_trace",
-                "qualname": "test_unknown_input_and_output_types.<locals>.MyUnknownClassA",
-                "name": "MyUnknownClassA",
-            },
-            "a_val": 3,
-        },
+        "a": repr(a),
         "b": 0.14,
     }
-    assert inner_res.calls[0].output == {
-        "__class__": {
-            "module": "test_client_trace",
-            "qualname": "test_unknown_input_and_output_types.<locals>.MyUnknownClassB",
-            "name": "MyUnknownClassB",
-        },
-        "b_val": 3.14,
-    }
+    assert inner_res.calls[0].output == repr(res)
 
 
 def test_unknown_attribute(client):
@@ -1547,22 +1533,8 @@ class MySerializableClass(weave.Object):
     a2 = weave.ref(ref_a.uri()).get()
     b2 = weave.ref(ref_b.uri()).get()
 
-    assert a2.obj == {
-        "__class__": {
-            "module": "test_client_trace",
-            "qualname": "test_unknown_attribute.<locals>.MyUnknownClass",
-            "name": "MyUnknownClass",
-        },
-        "a_val": 1,
-    }
-    assert b2.obj == {
-        "__class__": {
-            "module": "test_client_trace",
-            "qualname": "test_unknown_attribute.<locals>.MyUnknownClass",
-            "name": "MyUnknownClass",
-        },
-        "a_val": 2,
-    }
+    assert a2.obj == repr(a_obj)
+    assert b2.obj == repr(b_obj)
 
 
 @contextmanager

diff --git a/tests/trace/test_evaluations.py b/tests/trace/test_evaluations.py
@@ -768,9 +768,9 @@ def function_score(image, dc, model, obj, text, output) -> bool:
     # So this assertion is checking current state, but not
     # the correct behavior of the dataset (the should be the
     # MyDataclass, MyModel, and MyObj)
-    assert isinstance(row["dc"], dict)  #  MyDataclass
-    assert isinstance(row["model"], dict)  #  MyModel
-    assert isinstance(row["obj"], dict)  #  MyObj
+    assert isinstance(row["dc"], str)  #  MyDataclass
+    assert isinstance(row["model"], str)  #  MyModel
+    assert isinstance(row["obj"], str)  #  MyObj
     assert isinstance(row["text"], str)
 
     access_log = client.server.attribute_access_log

diff --git a/tests/trace/test_op_versioning.py b/tests/trace/test_op_versioning.py
@@ -1,3 +1,4 @@
+import re
 import typing
 
 import numpy as np
@@ -508,3 +509,46 @@ def some_d(v):
     print(saved_code)
 
     assert saved_code == EXPECTED_NO_REPEATS_CODE
+
+
+EXPECTED_INSTANCE_CODE = """import weave
+
+instance = "<test_op_versioning.test_op_instance.<locals>.MyClass object at 0x000000000>"
+
+@weave.op()
+def t(text: str):
+    print(instance._version)
+    return text
+"""
+
+
+def test_op_instance(client):
+    class MyClass:
+        _version: str
+        api_key: str
+
+        def __init__(self, secret: str) -> None:
+            self._version = "1.0.0"
+            self.api_key = secret
+
+    # We want to make sure this secret value is not saved in the code
+    instance = MyClass("sk-1234567890qwertyuiop")
+
+    @weave.op()
+    def t(text: str):
+        print(instance._version)
+        return text
+
+    t("hello")
+
+    ref = weave.obj_ref(t)
+    assert ref is not None
+
+    saved_code = get_saved_code(client, ref)
+    print("SAVED CODE")
+    print(saved_code)
+
+    # Instance address expected to change each run
+    clean_saved_code = re.sub(r"0x[0-9a-fA-F]+", "0x000000000", saved_code)
+
+    assert clean_saved_code == EXPECTED_INSTANCE_CODE
diff --git a/tests/trace/test_weave_client.py b/tests/trace/test_weave_client.py
@@ -666,6 +666,7 @@ def custom_obj_load(artifact, name):
     assert obj2.b == "x"
 
 
+@pytest.mark.skip(reason="Re-enable after dictify is fixed")
 def test_save_unknown_type(client):
     class SomeUnknownThing:
         def __init__(self, a):