diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index b68ac1b378fb..21924f78c5db 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -60,7 +60,7 @@ use crate::record_batch::RecordBatch; use csv_crate::{ByteRecord, StringRecord}; lazy_static! { - static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap(); + static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d*\.\d+)$").unwrap(); static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap(); static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$") .case_insensitive(true) @@ -271,7 +271,7 @@ pub fn infer_schema_from_files( has_header: bool, ) -> Result { let mut schemas = vec![]; - let mut records_to_read = max_read_records.unwrap_or(std::usize::MAX); + let mut records_to_read = max_read_records.unwrap_or(usize::MAX); for fname in files.iter() { let (schema, records_read) = infer_file_schema( @@ -1342,6 +1342,9 @@ mod tests { assert_eq!(infer_field_schema("\"123\""), DataType::Utf8); assert_eq!(infer_field_schema("10"), DataType::Int64); assert_eq!(infer_field_schema("10.2"), DataType::Float64); + assert_eq!(infer_field_schema(".2"), DataType::Float64); + // Should be parsed as Float or Int. See https://github.com/apache/arrow-rs/issues/929 + assert_eq!(infer_field_schema("2."), DataType::Utf8); assert_eq!(infer_field_schema("true"), DataType::Boolean); assert_eq!(infer_field_schema("false"), DataType::Boolean); assert_eq!(infer_field_schema("2020-11-08"), DataType::Date32);