diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index afcf6b373a9..b978799b8bc 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -140,6 +140,7 @@ struct parquet_field_bool_list : public parquet_field_listgetb(); assert_bool_field_type(current_byte); + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i] = current_byte == static_cast(FieldType::BOOLEAN_TRUE); }; bind_read_func(read_value); @@ -189,6 +190,7 @@ struct parquet_field_int_list : public parquet_field_list { parquet_field_int_list(int f, std::vector& v) : parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i] = cpr->get_zigzag(); }; this->bind_read_func(read_value); @@ -233,6 +235,7 @@ struct parquet_field_string_list : public parquet_field_listget_u32(); CUDF_EXPECTS(l < static_cast(cpr->m_end - cpr->m_cur), "string length mismatch"); + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i].assign(reinterpret_cast(cpr->m_cur), l); cpr->m_cur += l; }; @@ -270,6 +273,7 @@ struct parquet_field_enum_list : public parquet_field_list : parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i] = static_cast(cpr->get_i32()); }; this->bind_read_func(read_value); @@ -355,6 +359,7 @@ struct parquet_field_struct_list : public parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); cpr->read(&val[i]); }; this->bind_read_func(read_value); @@ -399,6 +404,7 @@ struct parquet_field_binary_list auto const l = cpr->get_u32(); CUDF_EXPECTS(l <= static_cast(cpr->m_end - cpr->m_cur), "binary length mismatch"); + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i].resize(l); val[i].assign(cpr->m_cur, cpr->m_cur + l); cpr->m_cur += l; diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index e0f57df1368..058e884e08b 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -276,8 +276,10 @@ def read_csv( col_name = df._data.names[index] df._data[col_name] = df._data[col_name].astype(col_dtype) - if names is not None and len(names) and isinstance(names[0], (int)): + if names is not None and len(names) and isinstance(names[0], int): df.columns = [int(x) for x in df._data] + elif names is None and header == -1 and cudf.get_option("mode.pandas_compatible"): + df.columns = [int(x) for x in df._column_names] # Set index if the index_col parameter is passed if index_col is not None and index_col is not False: diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index cee3d23eadc..b6efc8ebd88 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -2269,3 +2269,11 @@ def test_read_compressed_BOM(tmpdir): f.write(buffer) assert_eq(pd.read_csv(fname), cudf.read_csv(fname)) + + +def test_read_header_none_pandas_compat_column_type(): + data = "1\n2\n" + with cudf.option_context("mode.pandas_compatible", True): + result = cudf.read_csv(StringIO(data), header=None).columns + expected = pd.read_csv(StringIO(data), header=None).columns + pd.testing.assert_index_equal(result, expected, exact=True)