Skip to content

Commit

Permalink
fix: Don't allow json inference method to be chunked/streaming
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 3, 2024
1 parent 0e1e0bd commit bd9c282
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
3 changes: 2 additions & 1 deletion crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,9 @@ impl StringNameSpace {

#[cfg(feature = "extract_jsonpath")]
pub fn json_decode(self, dtype: Option<DataType>, infer_schema_len: Option<usize>) -> Expr {
// Apply, because dtype should be inferred only once and be consistent over chunks/morsels.
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::JsonDecode {
.apply_private(FunctionExpr::StringExpr(StringFunction::JsonDecode {
dtype,
infer_schema_len,
}))
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/unit/datatypes/test_string.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import polars as pl
from polars.testing import assert_series_equal

Expand Down Expand Up @@ -28,3 +30,17 @@ def test_utf8_alias_lit() -> None:
result = pl.select(a=pl.lit(5, dtype=pl.Utf8)).to_series()
expected = pl.Series("a", ["5"], dtype=pl.String)
assert_series_equal(result, expected)


def test_json_decode_multiple_chunks() -> None:
a = json.dumps({"x": None})
b = json.dumps({"x": True})

df_1 = pl.Series([a]).to_frame("s")
df_2 = pl.Series([b]).to_frame("s")

df = pl.concat([df_1, df_2])

assert df.with_columns(pl.col("s").str.json_decode()).to_dict(as_series=False) == {
"s": [{"x": None}, {"x": True}]
}

0 comments on commit bd9c282

Please sign in to comment.