Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
cet001 committed Apr 6, 2019
2 parents 96de814 + 6a90ba4 commit b597300
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 63 deletions.
10 changes: 9 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
language: go

go:
- 1.9.x
- "1.11.x"
- "1.12.x"

script:
- env GO111MODULE=on make

os:
- linux
- osx

notifications:
email:
Expand Down
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ clean :
@echo ">>> Cleaning and initializing hastycsv project <<<"
@go clean
@gofmt -w .
@go get github.com/stretchr/testify

test : clean
@echo ">>> Running unit tests <<<"
Expand Down
5 changes: 2 additions & 3 deletions examples/example_1_read_from_stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@ import (
)

func main() {
r := strings.NewReader(`make|model|year|mpg
Honda|Acura NSX|2017|18.1
r := strings.NewReader(`Honda|Acura NSX|2017|18.1
Chevrolet|Corvette|2016|16.5
BMW|M3|2015|18.7
Audi|A3|2014|25.4`)

// Create our CSV reader and configure it to use '|' as the field delimiter
hastyCsvReader := hastycsv.NewReader()
hastyCsvReader.Delimiter = '|'
hastyCsvReader.Comma = '|'

err := hastyCsvReader.Read(r, func(i int, fields []hastycsv.Field) {
fmt.Printf("line %v: make=%v, model=%v, year=%v, mpg=%v\n", i,
Expand Down
8 changes: 6 additions & 2 deletions examples/example_2_read_from_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ import (
func main() {
const csvFile = "./examples/sample_data.csv"

err := hastycsv.ReadFile(csvFile, '|', func(i int, fields []hastycsv.Field) {
fmt.Printf("line %v: make=%v, model=%v, year=%v, mpg=%v\n", i,
err := hastycsv.ReadFile(csvFile, '|', func(lineNum int, fields []hastycsv.Field) {
if lineNum == 1 {
return
} // skip header record

fmt.Printf("line %v: make=%v, model=%v, year=%v, mpg=%v\n", lineNum,
fields[0].String(),
fields[1].String(),
fields[2].Uint32(),
Expand Down
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module github.com/cet001/hastycsv

go 1.12

require github.com/stretchr/testify v1.3.0
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
42 changes: 24 additions & 18 deletions hastycsv.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,37 @@ import (
"unsafe"
)

// Needed by Field.Uint32() parser
// Needed by Field.Uint32() parser.
var base10exp = []uint32{1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}

// Reads records from a CSV-encoded file or io.Reader.
type Reader struct {
// Delimiter is the CSV field delimiter. It is set to ',' by NewReader().
Delimiter byte
fields []Field
Row int
err error
// Comma is the field delimiter.
// It is set to comma (',') by NewReader.
// Comma cannot be \r or \n.
Comma byte

fields []Field
row int
err error
}

// Returns a new Reader whose Delimiter is set to the comma character (',').
func NewReader() *Reader {
return &Reader{
Delimiter: ',',
Comma: ',',
}
}

func (me *Reader) Read(r io.Reader, nextRecord func(i int, record []Field)) error {
if me.Comma == '\r' || me.Comma == '\n' {
return fmt.Errorf(`Comma delimiter cannot be \r or \n`)
}

var fields []Field
isFirstRecord := true
delim := me.Delimiter
me.Row = 0
delim := me.Comma
me.row = 0

lineScanner := bufio.NewScanner(r)
for lineScanner.Scan() {
Expand All @@ -53,24 +60,23 @@ func (me *Reader) Read(r io.Reader, nextRecord func(i int, record []Field)) erro
field.reader = me
}
isFirstRecord = false
continue
}

me.Row++
me.row++

if err := splitBytes(b, delim, fields); err != nil {
return fmt.Errorf("Line %v: %v: \"%v\"", me.Row, err, string(b))
return fmt.Errorf("Line %v: %v: \"%v\"", me.row, err, string(b))
}

nextRecord(me.Row, fields)
nextRecord(me.row, fields)

if me.err != nil {
return fmt.Errorf("Line %v: %v", me.Row, me.err)
return fmt.Errorf("Line %v: %v", me.row, me.err)
}
}

if me.err != nil {
return fmt.Errorf("Line %v: %v", me.Row, me.err)
return fmt.Errorf("Line %v: %v", me.row, me.err)
}

if err := lineScanner.Err(); err != nil {
Expand All @@ -80,16 +86,16 @@ func (me *Reader) Read(r io.Reader, nextRecord func(i int, record []Field)) erro
return nil
}

func ReadFile(csvFilePath string, delim byte, nextRecord func(i int, record []Field)) error {
func ReadFile(csvFilePath string, comma byte, nextRecord func(i int, record []Field)) error {
f, err := os.Open(csvFilePath)
if err != nil {
return err
}
defer f.Close()

r := NewReader()
r.Delimiter = delim
return r.Read(f, nextRecord)
r.Comma = comma
return r.Read(bufio.NewReaderSize(f, 32*1024), nextRecord)
}

// Represents a field (encoded as a UTF-8 string) within a CSV record.
Expand Down
78 changes: 40 additions & 38 deletions hastycsv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,25 @@ func TestField_ToLower(t *testing.T) {
"!@#$%^&*()_+",
}

for _, value := range values {
assert.Equal(t, strings.ToLower(value), makeField(value).ToLower().String())
for i, value := range values {
assert.Equal(t,
strings.ToLower(value),
makeField(value).ToLower().String(),
"values[%v]", i,
)
}
}

func TestField_String(t *testing.T) {
testValues := []string{
values := []string{
"",
" ",
"a",
"abcdefg",
"ABC123",
}

for _, s := range testValues {
for _, s := range values {
field := makeField(s)
assert.Equal(t, s, field.String())
}
Expand Down Expand Up @@ -150,7 +154,7 @@ func TestSplitBytes(t *testing.T) {

// Special case: split bytes into a record that contains only 1 field. In this
// case, even if the input string contains the delimiter field, the entire string
// should get assinged to the record's single field.
// should get assigned to the record's single field.
func TestSplitBytes_recordWithOnlyOneField(t *testing.T) {
record := make([]Field, 1)
splitBytes([]byte("foo|bar"), '|', record)
Expand All @@ -175,14 +179,15 @@ func TestRead(t *testing.T) {
{name: "mary", age: 35, weight: 125.1},
}

data := "name|age|weight"
personRecords := []string{}
for _, p := range persons {
data += fmt.Sprintf("\n%v|%v|%v", p.name, p.age, p.weight)
personRecords = append(personRecords, fmt.Sprintf("%v|%v|%v", p.name, p.age, p.weight))
}
in := strings.NewReader(strings.Join(personRecords, "\n"))

r := NewReader()
r.Delimiter = '|'
err := r.Read(strings.NewReader(data), func(i int, fields []Field) {
r.Comma = '|'
err := r.Read(in, func(i int, fields []Field) {
expectedPerson := persons[i-1]
assert.Equal(t, expectedPerson.name, fields[0].String())
assert.Equal(t, expectedPerson.age, fields[1].Uint32())
Expand All @@ -192,15 +197,24 @@ func TestRead(t *testing.T) {
assert.Nil(t, err)
}

func TestRead_InvalidComma(t *testing.T) {
r := NewReader()
in := strings.NewReader(`10|20|30`)

for _, invalidCommaChar := range []byte{'\r', '\n'} {
r.Comma = invalidCommaChar
err := r.Read(in, func(i int, record []Field) { /* no-op */ })
assert.EqualError(t, err, `Comma delimiter cannot be \r or \n`)
}
}

func TestRead_parsingError(t *testing.T) {
// Create CSV input stream in which 1st line contains an unparseable field
// (in this case, the 'age' field)
in := strings.NewReader(`name|age|weight
John|123xyz|12.5
// Create CSV input stream in which line 1 contains an unparseable Uint32 field.
in := strings.NewReader(`John|123xyz|12.5
Mary|25|130.5`)

r := NewReader()
r.Delimiter = '|'
r.Comma = '|'
err := r.Read(in, func(i int, fields []Field) {
fields[0].String()
fields[1].Uint32() // This call will halt csv reading and return an error in the 1st line
Expand All @@ -216,10 +230,10 @@ func TestReadFile(t *testing.T) {
if err != nil {
assert.Fail(t, "Error creating temp file: %v", err)
}
defer os.Remove(tmpCsvFile.Name()) // delete the temp file when this functio n exits
fmt.Fprintln(tmpCsvFile, "firstName,lastName,age") // header row
fmt.Fprintln(tmpCsvFile, "mary,jones,35") // row 1
fmt.Fprintln(tmpCsvFile, "bill,anderson,40") // row 2
defer os.Remove(tmpCsvFile.Name()) // delete the temp file when this function exits

fmt.Fprintln(tmpCsvFile, "mary,jones,35") // row 1
fmt.Fprintln(tmpCsvFile, "bill,anderson,40") // row 2

err = ReadFile(tmpCsvFile.Name(), ',', func(i int, rec []Field) {
assert.Equal(t, 3, len(rec))
Expand Down Expand Up @@ -253,7 +267,7 @@ func BenchmarkRead_stringValues(b *testing.B) {
r := strings.NewReader(buf.String())

csvReader := NewReader()
csvReader.Delimiter = '|'
csvReader.Comma = '|'

b.ResetTimer()
for n := 0; n < b.N; n++ {
Expand All @@ -274,7 +288,7 @@ func BenchmarkRead_intValues(b *testing.B) {
r := strings.NewReader(buf.String())

csvReader := NewReader()
csvReader.Delimiter = '|'
csvReader.Comma = '|'

b.ResetTimer()
for n := 0; n < b.N; n++ {
Expand All @@ -296,22 +310,18 @@ func BenchmarkGoCsv_Read_stringValues(b *testing.B) {

golangReader := csv.NewReader(r)
golangReader.Comma = '|'
golangReader.ReuseRecord = true

b.ResetTimer()
for n := 0; n < b.N; n++ {
r.Reset(buf.String())
count := 0
isHeaderRecord := true
for {
fields, err := golangReader.Read()
if err == io.EOF {
break
}
require.Nil(b, err)
if isHeaderRecord { // skip the header record
isHeaderRecord = false
continue
}
for _, field := range fields {
tmpString = field
}
Expand All @@ -326,22 +336,18 @@ func BenchmarkGoCsv_Read_intValues(b *testing.B) {

golangReader := csv.NewReader(r)
golangReader.Comma = '|'
golangReader.ReuseRecord = true

b.ResetTimer()
for n := 0; n < b.N; n++ {
r.Reset(buf.String())
count := 0
isHeaderRecord := true
for {
fields, err := golangReader.Read()
if err == io.EOF {
break
}
require.Nil(b, err)
if isHeaderRecord { // skip the header record
isHeaderRecord = false
continue
}
for _, field := range fields {
v, err := strconv.Atoi(field)
require.Nil(b, err)
Expand All @@ -360,18 +366,14 @@ func createCsvRecords() *bytes.Buffer {

buf := bytes.NewBuffer(make([]byte, 0, recordCount))

// Write header record
for i := 0; i < fieldCount; i++ {
record[i] = fmt.Sprintf("field_%v", i)
}
buf.WriteString(strings.Join(record, "|"))

// Write the rest of the records
for i := 0; i < recordCount; i++ {
if i > 0 {
buf.WriteString("\n")
}

for j := 0; j < fieldCount; j++ {
record[j] = fmt.Sprintf("%v", baseValue+i)
}
buf.WriteString("\n")
buf.WriteString(strings.Join(record, "|"))
}

Expand Down

0 comments on commit b597300

Please sign in to comment.