Skip to content

Commit

Permalink
style: pre-commit fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] committed Jan 22, 2024
1 parent ebd11a4 commit 8686679
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 20 deletions.
15 changes: 4 additions & 11 deletions src/awkward/operations/ak_to_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,6 @@ def parquet_columns(specifier, only=None):
fs, destination = fsspec.core.url_to_fs(destination, **(storage_options or {}))
metalist = []


with pyarrow_parquet.ParquetWriter(
destination,
table.schema,
Expand All @@ -396,6 +395,7 @@ def parquet_columns(specifier, only=None):
) as writer:
if iter:
from pyarrow import Table

iterator = batch_iterator(
layout,
list_to32,
Expand Down Expand Up @@ -440,7 +440,6 @@ def write_metadata(dir_path, fs, *metas, global_metadata=True):
md.write_metadata_file(fil)



def batch_iterator(
layout,
list_to32,
Expand All @@ -455,9 +454,7 @@ def batch_iterator(

pyarrow = awkward._connect.pyarrow.import_pyarrow("ak.to_parquet")

if isinstance(
ak.operations.type(layout), ak.types.RecordType
):
if isinstance(ak.operations.type(layout), ak.types.RecordType):
names = layout.keys()
contents = [layout[name] for name in names]
else:
Expand All @@ -481,11 +478,7 @@ def batch_iterator(
)
pa_fields.append(
pyarrow.field(name, pa_arrays[-1].type).with_nullable(
isinstance(
ak.operations.type(content), ak.types.OptionType
)
isinstance(ak.operations.type(content), ak.types.OptionType)
)
)
yield pyarrow.RecordBatch.from_arrays(
pa_arrays, schema=pyarrow.schema(pa_fields)
)
yield pyarrow.RecordBatch.from_arrays(pa_arrays, schema=pyarrow.schema(pa_fields))
23 changes: 14 additions & 9 deletions src/awkward/operations/ak_to_parquet_row_groups.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@

from __future__ import annotations

from awkward import ak_to_parquet
from awkward._dispatch import high_level_function

__all__ = ("to_parquet",)

Check failure on line 6 in src/awkward/operations/ak_to_parquet_row_groups.py

View workflow job for this annotation

GitHub Actions / Run PyLint

Undefined variable name 'to_parquet' in __all__


@high_level_function()
def to_parquet_row_groups(
array_iterator, # good or bad name?
array_iterator, # good or bad name?
destination,
*,
list_to32=False,
Expand Down Expand Up @@ -170,9 +170,9 @@ def to_parquet_row_groups(
# Dispatch
yield (array_iterator,)

# If the input is an iterator, then should row-group size still be
# an option? Or should there be a check to determine if it's the same?
# seems like it should be set based on the iterator or something
# If the input is an iterator, then should row-group size still be
# an option? Or should there be a check to determine if it's the same?
# seems like it should be set based on the iterator or something

return ak_to_parquet._impl(
array_iterator,
Expand Down Expand Up @@ -202,16 +202,21 @@ def to_parquet_row_groups(
iter=True,
)

import awkward as ak
from skhep_testdata import data_path

import uproot
from skhep_testdata import data_path

import awkward as ak

path = "/Users/zobil/Documents/awkward/tests/samples/array.parquet"

iterator = uproot.iterate(uproot.open(data_path("uproot-HZZ.root"))['events'], step_size=10)
iterator = uproot.iterate(
uproot.open(data_path("uproot-HZZ.root"))["events"], step_size=10
)

# ak.to_parquet(array, "/Users/zobil/Documents/awkward/tests/samples/array.parquet")
to_parquet_row_groups(iterator, path, row_group_size=10)

test = ak.from_parquet(path)

print(len(test['Jet_Px']))
print(len(test["Jet_Px"]))

0 comments on commit 8686679

Please sign in to comment.