From 9616984b81840d5954baa5fc1dc53a097fdb7f97 Mon Sep 17 00:00:00 2001 From: cet001 Date: Thu, 20 Jun 2019 00:26:29 -0700 Subject: [PATCH] - Added `Field.Bytes()` method, which returns the field's backing []byte slice. - Exported `ParseUint32([]byte) uint32` function. - Fixed bug in `Field.Uint32()` whereby fields containing a number that overflows uint32 by more than a factor of 10 would result in a panic. --- hastycsv.go | 62 +++++++++++++++++++++++++++++++++--------------- hastycsv_test.go | 11 +++++++-- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/hastycsv.go b/hastycsv.go index bd0e325..2b96a48 100644 --- a/hastycsv.go +++ b/hastycsv.go @@ -9,13 +9,27 @@ import ( "bytes" "fmt" "io" + "math" "os" "strconv" "unsafe" ) -// Needed by Field.Uint32() parser. -var base10exp = []uint64{1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000} +// Needed by ParseUint32() for better performance. +var base10exp = []uint64{ + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 100000000000000000, + 1000000000000000000, +} // Definition of a callback function that serves as a sequential record iterator. // Read() and ReadFile() will stop reading the input records if this function @@ -116,6 +130,11 @@ func (me Field) IsEmpty() bool { return len(me.data) == 0 } +// Returns the backing byte slice of this field. +func (me Field) Bytes() []byte { + return me.data +} + // Returns this field as a string. func (me Field) String() string { return string(me.data) @@ -135,27 +154,14 @@ func (me Field) ToLower() Field { // Parses this field as a Uint32. func (me Field) Uint32() uint32 { - v := uint64(0) - d := len(me.data) - for _, b := range me.data { - if b < '0' || b > '9' { - if me.reader.err == nil { - me.reader.err = fmt.Errorf("Field \"%v\" contains non-numeric character '%v'", string(me.data), string(b)) - } - return 0 - } - d-- - v += uint64(b-'0') * base10exp[d] - } - - if v > 4294967295 { + i, err := ParseUint32(me.data) + if err != nil { if me.reader.err == nil { - me.reader.err = fmt.Errorf("%v overflows uint32", string(me.data)) + me.reader.err = fmt.Errorf(`Can't parse field as uint32: %v`, err) } - return 0 } - return uint32(v) + return i } // Parses this field as a float32. @@ -170,6 +176,24 @@ func (me Field) Float32() float32 { return float32(f) } +func ParseUint32(data []byte) (uint32, error) { + v := uint64(0) + d := len(data) + for _, ch := range data { + if ch < '0' || ch > '9' { + return 0, fmt.Errorf(`"%v" contains non-numeric character '%v'`, string(data), string(ch)) + } + d-- + v += uint64(ch-'0') * base10exp[d] + } + + if v > math.MaxUint32 { + return 0, fmt.Errorf(`"%v" overflows uint32`, string(data)) + } + + return uint32(v), nil +} + // Returns the string representation of this Field without creating a memory allocation. // // WARNING! The returned string points to this Field object, which is a mutable diff --git a/hastycsv_test.go b/hastycsv_test.go index 39c0fed..d4ad07d 100644 --- a/hastycsv_test.go +++ b/hastycsv_test.go @@ -45,6 +45,12 @@ func TestField_ToLower(t *testing.T) { } } +func TestField_Bytes(t *testing.T) { + assert.Equal(t, []byte{}, makeField("").Bytes()) + assert.Equal(t, []byte{65, 66, 67}, makeField("ABC").Bytes()) + +} + func TestField_String(t *testing.T) { values := []string{ "", @@ -88,7 +94,8 @@ func TestField_Uint32_parseError(t *testing.T) { "x", "abc", " ", - "4294967296", //uint32 overflow + "4294967296", //uint32 overflow (by 1) + "999999999999999", // uint32 overflow (by a lot) } for _, badlyFormattedInt := range badlyFormattedInts { @@ -256,7 +263,7 @@ Mary|25|130.5`) return nil }) - assert.EqualError(t, err, "Line 1: Field \"123xyz\" contains non-numeric character 'x'") + assert.EqualError(t, err, "Line 1: Can't parse field as uint32: \"123xyz\" contains non-numeric character 'x'") } func TestReadFile(t *testing.T) {