Skip to content

Commit

Permalink
ARROW-10636: [Rust] Reformat code
Browse files Browse the repository at this point in the history
  • Loading branch information
GregBowyer committed Dec 12, 2020
1 parent f8f9749 commit aeda0f2
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 111 deletions.
11 changes: 8 additions & 3 deletions rust/parquet/src/arrow/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::data_type::{ByteArray, FixedLenByteArray, DataType, Int96};
use crate::data_type::{ByteArray, DataType, FixedLenByteArray, Int96};
// TODO: clean up imports (best done when there are few moving parts)
use arrow::array::{
Array, ArrayRef, BinaryBuilder, FixedSizeBinaryBuilder, LargeBinaryBuilder,
Expand Down Expand Up @@ -57,8 +57,13 @@ impl FixedSizeArrayConverter {
}
}

impl Converter<Vec<Option<FixedLenByteArray>>, FixedSizeBinaryArray> for FixedSizeArrayConverter {
fn convert(&self, source: Vec<Option<FixedLenByteArray>>) -> Result<FixedSizeBinaryArray> {
impl Converter<Vec<Option<FixedLenByteArray>>, FixedSizeBinaryArray>
for FixedSizeArrayConverter
{
fn convert(
&self,
source: Vec<Option<FixedLenByteArray>>,
) -> Result<FixedSizeBinaryArray> {
let mut builder = FixedSizeBinaryBuilder::new(source.len(), self.byte_width);
for v in source {
match v {
Expand Down
12 changes: 4 additions & 8 deletions rust/parquet/src/column/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,7 @@
// under the License.

//! Contains column writer API.
use std::{
cmp,
marker::PhantomData,
collections::VecDeque,
convert::TryFrom,
sync::Arc
};
use std::{cmp, collections::VecDeque, convert::TryFrom, marker::PhantomData, sync::Arc};

use crate::basic::{Compression, Encoding, PageType, Type};
use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
Expand Down Expand Up @@ -974,7 +968,9 @@ fn fallback_encoding(kind: Type, props: &WriterProperties) -> Encoding {
(Type::INT32, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
(Type::INT64, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
(Type::BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
(Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
(Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => {
Encoding::DELTA_BYTE_ARRAY
}
_ => Encoding::PLAIN,
}
}
Expand Down
153 changes: 114 additions & 39 deletions rust/parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
//! Data types that connect Parquet physical types with their Rust-specific
//! representations.
use std::cmp::Ordering;
use std::ops::{Deref, DerefMut};
use std::mem;
use std::fmt;
use std::mem;
use std::ops::{Deref, DerefMut};
use std::str::from_utf8;

use byteorder::{BigEndian, ByteOrder};
Expand Down Expand Up @@ -50,7 +50,9 @@ impl Int96 {
/// Returns underlying data as slice of [`u32`].
#[inline]
pub fn data(&self) -> &[u32] {
self.value.as_ref().expect("set_data should have been called")
self.value
.as_ref()
.expect("set_data should have been called")
}

/// Sets data for this INT96 type.
Expand Down Expand Up @@ -158,7 +160,10 @@ impl ByteArray {
/// Returns slice of data.
#[inline]
pub fn data(&self) -> &[u8] {
self.data.as_ref().expect("set_data should have been called").as_ref()
self.data
.as_ref()
.expect("set_data should have been called")
.as_ref()
}

/// Set data from another byte buffer.
Expand All @@ -170,7 +175,12 @@ impl ByteArray {
/// Returns `ByteArray` instance with slice of values for a data.
#[inline]
pub fn slice(&self, start: usize, len: usize) -> Self {
Self::from(self.data.as_ref().expect("set_data should have been called").range(start, len))
Self::from(
self.data
.as_ref()
.expect("set_data should have been called")
.range(start, len),
)
}

pub fn as_utf8(&self) -> Result<&str> {
Expand Down Expand Up @@ -504,7 +514,7 @@ macro_rules! unimplemented_slice_as_bytes {
unimplemented!()
}
}
}
};
}

// TODO - Can Int96 and bool be implemented in these terms?
Expand Down Expand Up @@ -565,13 +575,13 @@ impl AsBytes for str {

pub(crate) mod private {
use crate::encodings::decoding::PlainDecoderDetails;
use crate::util::bit_util::{BitWriter, BitReader};
use crate::util::bit_util::{BitReader, BitWriter};
use crate::util::memory::ByteBufferPtr;

use byteorder::ByteOrder;
use std::convert::TryInto;

use super::{Result, ParquetError, SliceAsBytes};
use super::{ParquetError, Result, SliceAsBytes};

pub type BitIndex = u64;

Expand All @@ -580,7 +590,8 @@ pub(crate) mod private {
/// This is done to force the associated value type to be unimplementable outside of this
/// crate, and thus hint to the type system (and end user) traits are public for the contract
/// and not for extension.
pub trait ParquetValueType : std::cmp::PartialEq
pub trait ParquetValueType:
std::cmp::PartialEq
+ std::fmt::Debug
+ std::fmt::Display
+ std::default::Default
Expand All @@ -591,13 +602,24 @@ pub(crate) mod private {
+ PartialOrd
{
/// Encode the value directly from a higher level encoder
fn encode<W: std::io::Write>(values: &[Self], writer: &mut W, bit_writer: &mut BitWriter) -> Result<()>;
fn encode<W: std::io::Write>(
values: &[Self],
writer: &mut W,
bit_writer: &mut BitWriter,
) -> Result<()>;

/// Establish the data that will be decoded in a buffer
fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize);
fn set_data(
decoder: &mut PlainDecoderDetails,
data: ByteBufferPtr,
num_values: usize,
);

/// Decode the value from a given buffer for a higher level decoder
fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize>;
fn decode(
buffer: &mut [Self],
decoder: &mut PlainDecoderDetails,
) -> Result<usize>;

/// Return the encoded size for a type
fn dict_encoding_size(&self) -> (usize, usize) {
Expand Down Expand Up @@ -631,21 +653,32 @@ pub(crate) mod private {

impl ParquetValueType for bool {
#[inline]
fn encode<W: std::io::Write>(values: &[Self], _: &mut W, bit_writer: &mut BitWriter) -> Result<()> {
fn encode<W: std::io::Write>(
values: &[Self],
_: &mut W,
bit_writer: &mut BitWriter,
) -> Result<()> {
for value in values {
bit_writer.put_value(*value as u64, 1);
}
Ok(())
}

#[inline]
fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize) {
fn set_data(
decoder: &mut PlainDecoderDetails,
data: ByteBufferPtr,
num_values: usize,
) {
decoder.bit_reader.replace(BitReader::new(data));
decoder.num_values = num_values;
}

#[inline]
fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
fn decode(
buffer: &mut [Self],
decoder: &mut PlainDecoderDetails,
) -> Result<usize> {
let bit_reader = decoder.bit_reader.as_mut().unwrap();
let num_values = std::cmp::min(buffer.len(), decoder.num_values);
let values_read = bit_reader.get_batch(&mut buffer[..num_values], 1);
Expand Down Expand Up @@ -752,27 +785,44 @@ pub(crate) mod private {

impl ParquetValueType for super::Int96 {
#[inline]
fn encode<W: std::io::Write>(values: &[Self], writer: &mut W, _: &mut BitWriter) -> Result<()> {
fn encode<W: std::io::Write>(
values: &[Self],
writer: &mut W,
_: &mut BitWriter,
) -> Result<()> {
for value in values {
let raw = unsafe {
std::slice::from_raw_parts(value.data() as *const [u32] as *const u8, 12)
std::slice::from_raw_parts(
value.data() as *const [u32] as *const u8,
12,
)
};
writer.write_all(raw)?;
}
Ok(())
}

#[inline]
fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize) {
fn set_data(
decoder: &mut PlainDecoderDetails,
data: ByteBufferPtr,
num_values: usize,
) {
decoder.data.replace(data);
decoder.start = 0;
decoder.num_values = num_values;
}

#[inline]
fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
fn decode(
buffer: &mut [Self],
decoder: &mut PlainDecoderDetails,
) -> Result<usize> {
// TODO - Remove the duplication between this and the general slice method
let data = decoder.data.as_ref().expect("set_data should have been called");
let data = decoder
.data
.as_ref()
.expect("set_data should have been called");
let num_values = std::cmp::min(buffer.len(), decoder.num_values);
let bytes_left = data.len() - decoder.start;
let bytes_to_decode = 12 * num_values;
Expand Down Expand Up @@ -823,15 +873,20 @@ pub(crate) mod private {
macro_rules! read_num_bytes {
($ty:ty, $size:expr, $src:expr) => {{
assert!($size <= $src.len());
let mut buffer = <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
let mut buffer =
<$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
<$ty>::from_ne_bytes(buffer)
}};
}

impl ParquetValueType for super::ByteArray {
#[inline]
fn encode<W: std::io::Write>(values: &[Self], writer: &mut W, _: &mut BitWriter) -> Result<()> {
fn encode<W: std::io::Write>(
values: &[Self],
writer: &mut W,
_: &mut BitWriter,
) -> Result<()> {
for value in values {
let len: u32 = value.len().try_into().unwrap();
writer.write_all(&len.to_ne_bytes())?;
Expand All @@ -842,28 +897,37 @@ pub(crate) mod private {
}

#[inline]
fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize) {
fn set_data(
decoder: &mut PlainDecoderDetails,
data: ByteBufferPtr,
num_values: usize,
) {
decoder.data.replace(data);
decoder.start = 0;
decoder.num_values = num_values;
}

#[inline]
fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
let data = decoder.data.as_mut().expect("set_data should have been called");
fn decode(
buffer: &mut [Self],
decoder: &mut PlainDecoderDetails,
) -> Result<usize> {
let data = decoder
.data
.as_mut()
.expect("set_data should have been called");
let num_values = std::cmp::min(buffer.len(), decoder.num_values);
for i in 0..num_values {
let len: usize = read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref()) as usize;
let len: usize =
read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
as usize;
decoder.start += std::mem::size_of::<u32>();

if data.len() < decoder.start + len {
return Err(eof_err!("Not enough bytes to decode"));
}

let val: &mut Self = buffer[i]
.as_mut_any()
.downcast_mut()
.unwrap();
let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();

val.set_data(data.range(decoder.start, len));
decoder.start += len;
Expand Down Expand Up @@ -891,7 +955,11 @@ pub(crate) mod private {

impl ParquetValueType for super::FixedLenByteArray {
#[inline]
fn encode<W: std::io::Write>(values: &[Self], writer: &mut W, _: &mut BitWriter) -> Result<()> {
fn encode<W: std::io::Write>(
values: &[Self],
writer: &mut W,
_: &mut BitWriter,
) -> Result<()> {
for value in values {
let raw = value.data();
writer.write_all(raw)?;
Expand All @@ -900,17 +968,27 @@ pub(crate) mod private {
}

#[inline]
fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize) {
fn set_data(
decoder: &mut PlainDecoderDetails,
data: ByteBufferPtr,
num_values: usize,
) {
decoder.data.replace(data);
decoder.start = 0;
decoder.num_values = num_values;
}

#[inline]
fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
fn decode(
buffer: &mut [Self],
decoder: &mut PlainDecoderDetails,
) -> Result<usize> {
assert!(decoder.type_length > 0);

let data = decoder.data.as_mut().expect("set_data should have been called");
let data = decoder
.data
.as_mut()
.expect("set_data should have been called");
let num_values = std::cmp::min(buffer.len(), decoder.num_values);
for i in 0..num_values {
let len = decoder.type_length as usize;
Expand All @@ -919,10 +997,7 @@ pub(crate) mod private {
return Err(eof_err!("Not enough bytes to decode"));
}

let val: &mut Self = buffer[i]
.as_mut_any()
.downcast_mut()
.unwrap();
let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();

val.set_data(data.range(decoder.start, len));
decoder.start += len;
Expand Down Expand Up @@ -984,7 +1059,7 @@ pub trait DataType: 'static {
// Workaround bug in specialization
pub trait SliceAsBytesDataType: DataType
where
Self::T: SliceAsBytes
Self::T: SliceAsBytes,
{
}

Expand Down
Loading

0 comments on commit aeda0f2

Please sign in to comment.