Skip to content

Commit

Permalink
Added new CSV option to turn off auto-parsing #1947
Browse files Browse the repository at this point in the history
  • Loading branch information
mikefarah committed Feb 15, 2024
1 parent e81b600 commit c32a9ce
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 28 deletions.
34 changes: 34 additions & 0 deletions acceptance_tests/inputs-format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,40 @@ EOM
assertEquals "$expected" "$X"
}

testInputCSVNoAuto() {
cat >test.csv <<EOL
thing1
name: cat
EOL

read -r -d '' expected << EOM
- thing1: 'name: cat'
EOM

X=$(./yq --csv-auto-parse=f test.csv -oy)
assertEquals "$expected" "$X"

X=$(./yq ea --csv-auto-parse=f test.csv -oy)
assertEquals "$expected" "$X"
}

testInputTSVNoAuto() {
cat >test.tsv <<EOL
thing1
name: cat
EOL

read -r -d '' expected << EOM
- thing1: 'name: cat'
EOM

X=$(./yq --tsv-auto-parse=f test.tsv -oy)
assertEquals "$expected" "$X"

X=$(./yq ea --tsv-auto-parse=f test.tsv -oy)
assertEquals "$expected" "$X"
}

testInputCSVUTF8() {
read -r -d '' expected << EOM
- id: 1
Expand Down
3 changes: 3 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ yq -P -oy sample.json
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredXMLPreferences.SkipProcInst, "xml-skip-proc-inst", yqlib.ConfiguredXMLPreferences.SkipProcInst, "skip over process instructions (e.g. <?xml version=\"1\"?>)")
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredXMLPreferences.SkipDirectives, "xml-skip-directives", yqlib.ConfiguredXMLPreferences.SkipDirectives, "skip over directives (e.g. <!DOCTYPE thing cat>)")

rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredCsvPreferences.AutoParse, "csv-auto-parse", yqlib.ConfiguredCsvPreferences.AutoParse, "parse CSV YAML/JSON values")
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredTsvPreferences.AutoParse, "tsv-auto-parse", yqlib.ConfiguredTsvPreferences.AutoParse, "parse TSV YAML/JSON values")

rootCmd.PersistentFlags().StringVar(&yqlib.ConfiguredLuaPreferences.DocPrefix, "lua-prefix", yqlib.ConfiguredLuaPreferences.DocPrefix, "prefix")
rootCmd.PersistentFlags().StringVar(&yqlib.ConfiguredLuaPreferences.DocSuffix, "lua-suffix", yqlib.ConfiguredLuaPreferences.DocSuffix, "suffix")
rootCmd.PersistentFlags().BoolVar(&yqlib.ConfiguredLuaPreferences.UnquotedKeys, "lua-unquoted", yqlib.ConfiguredLuaPreferences.UnquotedKeys, "output unquoted string keys (e.g. {foo=\"bar\"})")
Expand Down
4 changes: 2 additions & 2 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ func createDecoder(format yqlib.InputFormat, evaluateTogether bool) (yqlib.Decod
case yqlib.JsonInputFormat:
return yqlib.NewJSONDecoder(), nil
case yqlib.CSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder(','), nil
return yqlib.NewCSVObjectDecoder(yqlib.ConfiguredCsvPreferences), nil
case yqlib.TSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder('\t'), nil
return yqlib.NewCSVObjectDecoder(yqlib.ConfiguredTsvPreferences), nil
case yqlib.TomlInputFormat:
return yqlib.NewTomlDecoder(), nil
case yqlib.YamlInputFormat:
Expand Down
23 changes: 23 additions & 0 deletions pkg/yqlib/csv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package yqlib

type CsvPreferences struct {
Separator rune
AutoParse bool
}

func NewDefaultCsvPreferences() CsvPreferences {
return CsvPreferences{
Separator: ',',
AutoParse: true,
}
}

func NewDefaultTsvPreferences() CsvPreferences {
return CsvPreferences{
Separator: '\t',
AutoParse: true,
}
}

var ConfiguredCsvPreferences = NewDefaultCsvPreferences()
var ConfiguredTsvPreferences = NewDefaultTsvPreferences()
92 changes: 79 additions & 13 deletions pkg/yqlib/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ const csvSimple = `name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
`

const csvSimpleWithObject = `name,numberOfCats,likesApples,height,facts
Gary,1,true,168.8,cool: true
Samantha's Rabbit,2,false,-188.8,tall: indeed
`
const csvMissing = `name,numberOfCats,likesApples,height
,null,,168.8
`
Expand Down Expand Up @@ -39,6 +44,31 @@ const expectedYamlFromCSV = `- name: Gary
likesApples: false
height: -188.8
`
const expectedYamlFromCSVWithObject = `- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
facts:
cool: true
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts:
tall: indeed
`

const expectedYamlFromCSVNoParsing = `- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
facts: 'cool: true'
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts: 'tall: indeed'
`

const expectedYamlFromCSVMissingData = `- name: Gary
numberOfCats: 1
Expand Down Expand Up @@ -125,22 +155,29 @@ var csvScenarios = []formatScenario{
input: csvSimple,
expression: ".[0].name | key",
expected: "name\n",
scenarioType: "decode-csv-object",
scenarioType: "decode-csv",
},
{
description: "decode csv parent",
skipDoc: true,
input: csvSimple,
expression: ".[0].name | parent | .height",
expected: "168.8\n",
scenarioType: "decode-csv-object",
scenarioType: "decode-csv",
},
{
description: "Parse CSV into an array of objects",
subdescription: "First row is assumed to be the header row.",
input: csvSimple,
expected: expectedYamlFromCSV,
scenarioType: "decode-csv-object",
subdescription: "First row is assumed to be the header row. By default, entries with YAML/JSON formatting will be parsed!",
input: csvSimpleWithObject,
expected: expectedYamlFromCSVWithObject,
scenarioType: "decode-csv",
},
{
description: "Parse CSV into an array of objects, no auto-parsing",
subdescription: "First row is assumed to be the header row. Entries with YAML/JSON will be left as strings.",
input: csvSimpleWithObject,
expected: expectedYamlFromCSVNoParsing,
scenarioType: "decode-csv-no-auto",
},
{
description: "Scalar roundtrip",
Expand Down Expand Up @@ -172,12 +209,14 @@ func testCSVScenario(t *testing.T, s formatScenario) {
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewCsvEncoder(',')), s.description)
case "encode-tsv":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewYamlDecoder(ConfiguredYamlPreferences), NewCsvEncoder('\t')), s.description)
case "decode-csv-object":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "decode-csv":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredCsvPreferences), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "decode-csv-no-auto":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: ',', AutoParse: false}), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "decode-tsv-object":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredTsvPreferences), NewYamlEncoder(2, false, ConfiguredYamlPreferences)), s.description)
case "roundtrip-csv":
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
test.AssertResultWithContext(t, s.expected, mustProcessFormatScenario(s, NewCSVObjectDecoder(ConfiguredCsvPreferences), NewCsvEncoder(',')), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
Expand All @@ -204,7 +243,32 @@ func documentCSVDecodeObjectScenario(w *bufio.Writer, s formatScenario, formatTy
}

writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
mustProcessFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))),
mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: true}), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))),
)
}

func documentCSVDecodeObjectNoAutoScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))

if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}

writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))

writeOrPanic(w, "then\n")
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v --csv-auto-parse=f sample.%v\n```\n", formatType, formatType))
writeOrPanic(w, "will output\n")

separator := ','
if formatType == "tsv" {
separator = '\t'
}

writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: false}), NewYamlEncoder(s.indent, false, ConfiguredYamlPreferences))),
)
}

Expand Down Expand Up @@ -268,7 +332,7 @@ func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType
}

writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
mustProcessFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
mustProcessFormatScenario(s, NewCSVObjectDecoder(CsvPreferences{Separator: separator, AutoParse: true}), NewCsvEncoder(separator))),
)
}

Expand All @@ -282,8 +346,10 @@ func documentCSVScenario(_ *testing.T, w *bufio.Writer, i interface{}) {
documentCSVEncodeScenario(w, s, "csv")
case "encode-tsv":
documentCSVEncodeScenario(w, s, "tsv")
case "decode-csv-object":
case "decode-csv":
documentCSVDecodeObjectScenario(w, s, "csv")
case "decode-csv-no-auto":
documentCSVDecodeObjectNoAutoScenario(w, s, "csv")
case "decode-tsv-object":
documentCSVDecodeObjectScenario(w, s, "tsv")
case "roundtrip-csv":
Expand Down
16 changes: 9 additions & 7 deletions pkg/yqlib/decoder_csv_object.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,29 @@ import (
)

type csvObjectDecoder struct {
separator rune
reader csv.Reader
finished bool
prefs CsvPreferences
reader csv.Reader
finished bool
}

func NewCSVObjectDecoder(separator rune) Decoder {
return &csvObjectDecoder{separator: separator}
func NewCSVObjectDecoder(prefs CsvPreferences) Decoder {
return &csvObjectDecoder{prefs: prefs}
}

func (dec *csvObjectDecoder) Init(reader io.Reader) error {
cleanReader, enc := utfbom.Skip(reader)
log.Debugf("Detected encoding: %s\n", enc)
dec.reader = *csv.NewReader(cleanReader)
dec.reader.Comma = dec.separator
dec.reader.Comma = dec.prefs.Separator
dec.finished = false
return nil
}

func (dec *csvObjectDecoder) convertToNode(content string) *CandidateNode {
node, err := parseSnippet(content)
if err != nil {
// if we're not auto-parsing, then we wont put in parsed objects or arrays
// but we still parse scalars
if err != nil || (!dec.prefs.AutoParse && node.Kind != ScalarNode) {
return createScalarNode(content, content)
}
return node
Expand Down
40 changes: 36 additions & 4 deletions pkg/yqlib/doc/usage/csv-tsv.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,13 @@ Samantha's Rabbit,,-188.8
```

## Parse CSV into an array of objects
First row is assumed to be the header row.
First row is assumed to be the header row. By default, entries with YAML/JSON formatting will be parsed!

Given a sample.csv file of:
```csv
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
name,numberOfCats,likesApples,height,facts
Gary,1,true,168.8,cool: true
Samantha's Rabbit,2,false,-188.8,tall: indeed
```
then
Expand All @@ -155,10 +155,42 @@ will output
numberOfCats: 1
likesApples: true
height: 168.8
facts:
cool: true
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts:
tall: indeed
```
## Parse CSV into an array of objects, no auto-parsing
First row is assumed to be the header row. Entries with YAML/JSON will be left as strings.
Given a sample.csv file of:
```csv
name,numberOfCats,likesApples,height,facts
Gary,1,true,168.8,cool: true
Samantha's Rabbit,2,false,-188.8,tall: indeed

```
then
```bash
yq -p=csv --csv-auto-parse=f sample.csv
```
will output
```yaml
- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
facts: 'cool: true'
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
facts: 'tall: indeed'
```
## Parse TSV into an array of objects
Expand Down
4 changes: 2 additions & 2 deletions pkg/yqlib/operator_encoder_decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ func createDecoder(format InputFormat) Decoder {
case PropertiesInputFormat:
decoder = NewPropertiesDecoder()
case CSVObjectInputFormat:
decoder = NewCSVObjectDecoder(',')
decoder = NewCSVObjectDecoder(ConfiguredCsvPreferences)
case TSVObjectInputFormat:
decoder = NewCSVObjectDecoder('\t')
decoder = NewCSVObjectDecoder(ConfiguredTsvPreferences)
case UriInputFormat:
decoder = NewUriDecoder()
}
Expand Down

0 comments on commit c32a9ce

Please sign in to comment.