diff --git a/.travis.yml b/.travis.yml index ef2ab24..d0f2c0a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,15 @@ language: go go: - - 1.9.x + - "1.11.x" + - "1.12.x" + +script: + - env GO111MODULE=on make + +os: + - linux + - osx notifications: email: diff --git a/Makefile b/Makefile index 61992d9..a1d26d8 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,6 @@ clean : @echo ">>> Cleaning and initializing hastycsv project <<<" @go clean @gofmt -w . - @go get github.com/stretchr/testify test : clean @echo ">>> Running unit tests <<<" diff --git a/examples/example_1_read_from_stream.go b/examples/example_1_read_from_stream.go index 18c5d2d..009b3c8 100644 --- a/examples/example_1_read_from_stream.go +++ b/examples/example_1_read_from_stream.go @@ -7,15 +7,14 @@ import ( ) func main() { - r := strings.NewReader(`make|model|year|mpg -Honda|Acura NSX|2017|18.1 + r := strings.NewReader(`Honda|Acura NSX|2017|18.1 Chevrolet|Corvette|2016|16.5 BMW|M3|2015|18.7 Audi|A3|2014|25.4`) // Create our CSV reader and configure it to use '|' as the field delimiter hastyCsvReader := hastycsv.NewReader() - hastyCsvReader.Delimiter = '|' + hastyCsvReader.Comma = '|' err := hastyCsvReader.Read(r, func(i int, fields []hastycsv.Field) { fmt.Printf("line %v: make=%v, model=%v, year=%v, mpg=%v\n", i, diff --git a/examples/example_2_read_from_file.go b/examples/example_2_read_from_file.go index fdde838..587eb6d 100644 --- a/examples/example_2_read_from_file.go +++ b/examples/example_2_read_from_file.go @@ -8,8 +8,12 @@ import ( func main() { const csvFile = "./examples/sample_data.csv" - err := hastycsv.ReadFile(csvFile, '|', func(i int, fields []hastycsv.Field) { - fmt.Printf("line %v: make=%v, model=%v, year=%v, mpg=%v\n", i, + err := hastycsv.ReadFile(csvFile, '|', func(lineNum int, fields []hastycsv.Field) { + if lineNum == 1 { + return + } // skip header record + + fmt.Printf("line %v: make=%v, model=%v, year=%v, mpg=%v\n", lineNum, fields[0].String(), fields[1].String(), fields[2].Uint32(), diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4d9cda7 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/cet001/hastycsv + +go 1.12 + +require github.com/stretchr/testify v1.3.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..4347755 --- /dev/null +++ b/go.sum @@ -0,0 +1,7 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= diff --git a/hastycsv.go b/hastycsv.go index 0055ca3..584b857 100644 --- a/hastycsv.go +++ b/hastycsv.go @@ -14,30 +14,37 @@ import ( "unsafe" ) -// Needed by Field.Uint32() parser +// Needed by Field.Uint32() parser. var base10exp = []uint32{1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000} // Reads records from a CSV-encoded file or io.Reader. type Reader struct { - // Delimiter is the CSV field delimiter. It is set to ',' by NewReader(). - Delimiter byte - fields []Field - Row int - err error + // Comma is the field delimiter. + // It is set to comma (',') by NewReader. + // Comma cannot be \r or \n. + Comma byte + + fields []Field + row int + err error } // Returns a new Reader whose Delimiter is set to the comma character (','). func NewReader() *Reader { return &Reader{ - Delimiter: ',', + Comma: ',', } } func (me *Reader) Read(r io.Reader, nextRecord func(i int, record []Field)) error { + if me.Comma == '\r' || me.Comma == '\n' { + return fmt.Errorf(`Comma delimiter cannot be \r or \n`) + } + var fields []Field isFirstRecord := true - delim := me.Delimiter - me.Row = 0 + delim := me.Comma + me.row = 0 lineScanner := bufio.NewScanner(r) for lineScanner.Scan() { @@ -53,24 +60,23 @@ func (me *Reader) Read(r io.Reader, nextRecord func(i int, record []Field)) erro field.reader = me } isFirstRecord = false - continue } - me.Row++ + me.row++ if err := splitBytes(b, delim, fields); err != nil { - return fmt.Errorf("Line %v: %v: \"%v\"", me.Row, err, string(b)) + return fmt.Errorf("Line %v: %v: \"%v\"", me.row, err, string(b)) } - nextRecord(me.Row, fields) + nextRecord(me.row, fields) if me.err != nil { - return fmt.Errorf("Line %v: %v", me.Row, me.err) + return fmt.Errorf("Line %v: %v", me.row, me.err) } } if me.err != nil { - return fmt.Errorf("Line %v: %v", me.Row, me.err) + return fmt.Errorf("Line %v: %v", me.row, me.err) } if err := lineScanner.Err(); err != nil { @@ -80,7 +86,7 @@ func (me *Reader) Read(r io.Reader, nextRecord func(i int, record []Field)) erro return nil } -func ReadFile(csvFilePath string, delim byte, nextRecord func(i int, record []Field)) error { +func ReadFile(csvFilePath string, comma byte, nextRecord func(i int, record []Field)) error { f, err := os.Open(csvFilePath) if err != nil { return err @@ -88,8 +94,8 @@ func ReadFile(csvFilePath string, delim byte, nextRecord func(i int, record []Fi defer f.Close() r := NewReader() - r.Delimiter = delim - return r.Read(f, nextRecord) + r.Comma = comma + return r.Read(bufio.NewReaderSize(f, 32*1024), nextRecord) } // Represents a field (encoded as a UTF-8 string) within a CSV record. diff --git a/hastycsv_test.go b/hastycsv_test.go index 34aa879..889737f 100644 --- a/hastycsv_test.go +++ b/hastycsv_test.go @@ -29,13 +29,17 @@ func TestField_ToLower(t *testing.T) { "!@#$%^&*()_+", } - for _, value := range values { - assert.Equal(t, strings.ToLower(value), makeField(value).ToLower().String()) + for i, value := range values { + assert.Equal(t, + strings.ToLower(value), + makeField(value).ToLower().String(), + "values[%v]", i, + ) } } func TestField_String(t *testing.T) { - testValues := []string{ + values := []string{ "", " ", "a", @@ -43,7 +47,7 @@ func TestField_String(t *testing.T) { "ABC123", } - for _, s := range testValues { + for _, s := range values { field := makeField(s) assert.Equal(t, s, field.String()) } @@ -150,7 +154,7 @@ func TestSplitBytes(t *testing.T) { // Special case: split bytes into a record that contains only 1 field. In this // case, even if the input string contains the delimiter field, the entire string -// should get assinged to the record's single field. +// should get assigned to the record's single field. func TestSplitBytes_recordWithOnlyOneField(t *testing.T) { record := make([]Field, 1) splitBytes([]byte("foo|bar"), '|', record) @@ -175,14 +179,15 @@ func TestRead(t *testing.T) { {name: "mary", age: 35, weight: 125.1}, } - data := "name|age|weight" + personRecords := []string{} for _, p := range persons { - data += fmt.Sprintf("\n%v|%v|%v", p.name, p.age, p.weight) + personRecords = append(personRecords, fmt.Sprintf("%v|%v|%v", p.name, p.age, p.weight)) } + in := strings.NewReader(strings.Join(personRecords, "\n")) r := NewReader() - r.Delimiter = '|' - err := r.Read(strings.NewReader(data), func(i int, fields []Field) { + r.Comma = '|' + err := r.Read(in, func(i int, fields []Field) { expectedPerson := persons[i-1] assert.Equal(t, expectedPerson.name, fields[0].String()) assert.Equal(t, expectedPerson.age, fields[1].Uint32()) @@ -192,15 +197,24 @@ func TestRead(t *testing.T) { assert.Nil(t, err) } +func TestRead_InvalidComma(t *testing.T) { + r := NewReader() + in := strings.NewReader(`10|20|30`) + + for _, invalidCommaChar := range []byte{'\r', '\n'} { + r.Comma = invalidCommaChar + err := r.Read(in, func(i int, record []Field) { /* no-op */ }) + assert.EqualError(t, err, `Comma delimiter cannot be \r or \n`) + } +} + func TestRead_parsingError(t *testing.T) { - // Create CSV input stream in which 1st line contains an unparseable field - // (in this case, the 'age' field) - in := strings.NewReader(`name|age|weight -John|123xyz|12.5 + // Create CSV input stream in which line 1 contains an unparseable Uint32 field. + in := strings.NewReader(`John|123xyz|12.5 Mary|25|130.5`) r := NewReader() - r.Delimiter = '|' + r.Comma = '|' err := r.Read(in, func(i int, fields []Field) { fields[0].String() fields[1].Uint32() // This call will halt csv reading and return an error in the 1st line @@ -216,10 +230,10 @@ func TestReadFile(t *testing.T) { if err != nil { assert.Fail(t, "Error creating temp file: %v", err) } - defer os.Remove(tmpCsvFile.Name()) // delete the temp file when this functio n exits - fmt.Fprintln(tmpCsvFile, "firstName,lastName,age") // header row - fmt.Fprintln(tmpCsvFile, "mary,jones,35") // row 1 - fmt.Fprintln(tmpCsvFile, "bill,anderson,40") // row 2 + defer os.Remove(tmpCsvFile.Name()) // delete the temp file when this function exits + + fmt.Fprintln(tmpCsvFile, "mary,jones,35") // row 1 + fmt.Fprintln(tmpCsvFile, "bill,anderson,40") // row 2 err = ReadFile(tmpCsvFile.Name(), ',', func(i int, rec []Field) { assert.Equal(t, 3, len(rec)) @@ -253,7 +267,7 @@ func BenchmarkRead_stringValues(b *testing.B) { r := strings.NewReader(buf.String()) csvReader := NewReader() - csvReader.Delimiter = '|' + csvReader.Comma = '|' b.ResetTimer() for n := 0; n < b.N; n++ { @@ -274,7 +288,7 @@ func BenchmarkRead_intValues(b *testing.B) { r := strings.NewReader(buf.String()) csvReader := NewReader() - csvReader.Delimiter = '|' + csvReader.Comma = '|' b.ResetTimer() for n := 0; n < b.N; n++ { @@ -296,22 +310,18 @@ func BenchmarkGoCsv_Read_stringValues(b *testing.B) { golangReader := csv.NewReader(r) golangReader.Comma = '|' + golangReader.ReuseRecord = true b.ResetTimer() for n := 0; n < b.N; n++ { r.Reset(buf.String()) count := 0 - isHeaderRecord := true for { fields, err := golangReader.Read() if err == io.EOF { break } require.Nil(b, err) - if isHeaderRecord { // skip the header record - isHeaderRecord = false - continue - } for _, field := range fields { tmpString = field } @@ -326,22 +336,18 @@ func BenchmarkGoCsv_Read_intValues(b *testing.B) { golangReader := csv.NewReader(r) golangReader.Comma = '|' + golangReader.ReuseRecord = true b.ResetTimer() for n := 0; n < b.N; n++ { r.Reset(buf.String()) count := 0 - isHeaderRecord := true for { fields, err := golangReader.Read() if err == io.EOF { break } require.Nil(b, err) - if isHeaderRecord { // skip the header record - isHeaderRecord = false - continue - } for _, field := range fields { v, err := strconv.Atoi(field) require.Nil(b, err) @@ -360,18 +366,14 @@ func createCsvRecords() *bytes.Buffer { buf := bytes.NewBuffer(make([]byte, 0, recordCount)) - // Write header record - for i := 0; i < fieldCount; i++ { - record[i] = fmt.Sprintf("field_%v", i) - } - buf.WriteString(strings.Join(record, "|")) - - // Write the rest of the records for i := 0; i < recordCount; i++ { + if i > 0 { + buf.WriteString("\n") + } + for j := 0; j < fieldCount; j++ { record[j] = fmt.Sprintf("%v", baseValue+i) } - buf.WriteString("\n") buf.WriteString(strings.Join(record, "|")) }