Skip to content

Commit

Permalink
Fix shuffle code to work with pyarrow 13 (#8009)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Jul 18, 2023
1 parent 2be7f35 commit b7e5f8f
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions distributed/shuffle/_worker_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,7 +951,7 @@ def split_by_worker(
# bytestream such that it cannot be deserialized anymore
t = pa.Table.from_pandas(df, preserve_index=True)
t = t.sort_by("_worker")
codes = np.asarray(t.select(["_worker"]))[0]
codes = np.asarray(t["_worker"])
t = t.drop(["_worker"])
del df

Expand Down Expand Up @@ -983,7 +983,7 @@ def split_by_partition(t: pa.Table, column: str) -> dict[Any, pa.Table]:
partitions.sort()
t = t.sort_by(column)

partition = np.asarray(t.select([column]))[0]
partition = np.asarray(t[column])
splits = np.where(partition[1:] != partition[:-1])[0] + 1
splits = np.concatenate([[0], splits])

Expand Down

0 comments on commit b7e5f8f

Please sign in to comment.