Skip to content

Commit

Permalink
Merge pull request #10929 from rapidsai/branch-22.06
Browse files Browse the repository at this point in the history
[gpuCI] Forward-merge branch-22.06 to branch-22.08 [skip gpuci]
  • Loading branch information
GPUtester authored May 23, 2022
2 parents 0e74fca + 5067cc7 commit 58313f3
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 17 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def read_avro(
"`read_avro` does not yet support reading multiple files"
)

filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=filepath_or_buffer, compression=None, **kwargs
)
if compression is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def read_csv(
"`read_csv` does not yet support reading multiple files"
)

filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=filepath_or_buffer,
compression=compression,
iotypes=(BytesIO, StringIO, NativeFile),
Expand Down
6 changes: 4 additions & 2 deletions python/cudf/cudf/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ def read_json(
source = ioutils.stringify_pathlike(source)
source = fs.sep.join([source, "*.json"])

tmp_source, compression = ioutils.get_filepath_or_buffer(
tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source,
compression=compression,
iotypes=(BytesIO, StringIO),
allow_raw_text_input=True,
**kwargs,
)
if isinstance(tmp_source, list):
Expand Down Expand Up @@ -73,10 +74,11 @@ def read_json(
"multiple files via pandas"
)

path_or_buf, compression = ioutils.get_filepath_or_buffer(
path_or_buf, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=path_or_buf,
compression=compression,
iotypes=(BytesIO, StringIO),
allow_raw_text_input=True,
**kwargs,
)

Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def read_orc_statistics(
files_statistics = []
stripes_statistics = []
for source in filepaths_or_buffers:
filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
path_or_buf, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source, compression=None, **kwargs
)
if compression is not None:
Expand All @@ -182,7 +182,7 @@ def read_orc_statistics(
column_names,
raw_file_statistics,
raw_stripes_statistics,
) = liborc.read_raw_orc_statistics(filepath_or_buffer)
) = liborc.read_raw_orc_statistics(path_or_buf)

# Parse column names
column_names = [
Expand Down Expand Up @@ -323,7 +323,7 @@ def read_orc(
source = stringify_path(source)
source = fs.sep.join([source, "*.orc"])

tmp_source, compression = ioutils.get_filepath_or_buffer(
tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source,
compression=None,
use_python_file_object=use_python_file_object,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def read_parquet(
fs=fs,
)
for i, source in enumerate(filepath_or_buffer):
tmp_source, compression = ioutils.get_filepath_or_buffer(
tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source,
compression=None,
fs=fs,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def read_text(
):
"""{docstring}"""

filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=filepath_or_buffer,
compression=None,
iotypes=(BytesIO, StringIO),
Expand Down
26 changes: 18 additions & 8 deletions python/cudf/cudf/utils/ioutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@ def _open_remote_files(
]


def get_filepath_or_buffer(
def get_reader_filepath_or_buffer(
path_or_data,
compression,
mode="rb",
Expand All @@ -1328,6 +1328,7 @@ def get_filepath_or_buffer(
byte_ranges=None,
use_python_file_object=False,
open_file_options=None,
allow_raw_text_input=False,
**kwargs,
):
"""Return either a filepath string to data, or a memory buffer of data.
Expand All @@ -1352,6 +1353,11 @@ def get_filepath_or_buffer(
open_file_options : dict, optional
Optional dictionary of key-word arguments to pass to
`_open_remote_files` (used for remote storage only).
allow_raw_text_input : boolean, default False
If True, this indicates the input `path_or_data` could be a raw text
input and will not check for its existence in the filesystem. If False,
the input must be a path and an error will be raised if it does not
exist.
Returns
-------
Expand All @@ -1372,18 +1378,22 @@ def get_filepath_or_buffer(
if fs is None:
return path_or_data, compression

if len(paths) == 0:
raise FileNotFoundError(
f"{path_or_data} could not be resolved to any files"
)

if _is_local_filesystem(fs):
# Doing this as `read_json` accepts a json string
# path_or_data need not be a filepath like string
if os.path.exists(paths[0]):
path_or_data = paths if len(paths) > 1 else paths[0]
if len(paths):
if fs.exists(paths[0]):
path_or_data = paths if len(paths) > 1 else paths[0]
elif not allow_raw_text_input:
raise FileNotFoundError(
f"{path_or_data} could not be resolved to any files"
)

else:
if len(paths) == 0:
raise FileNotFoundError(
f"{path_or_data} could not be resolved to any files"
)
if use_python_file_object:
path_or_data = _open_remote_files(
paths,
Expand Down

0 comments on commit 58313f3

Please sign in to comment.