From 5e69c6a432f0a9f50d4a95112e8d9861dd91243f Mon Sep 17 00:00:00 2001 From: Gabriel Aszalos Date: Mon, 5 Nov 2018 15:07:02 +0100 Subject: [PATCH] obfuscate: add exception when parsing empty-string identifiers (#514) --- obfuscate/sql_test.go | 32 ++++++++++++++++++++++++++------ obfuscate/sql_tokenizer.go | 11 ++++++++++- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/obfuscate/sql_test.go b/obfuscate/sql_test.go index c895c284d..34afd4adf 100644 --- a/obfuscate/sql_test.go +++ b/obfuscate/sql_test.go @@ -86,8 +86,6 @@ func TestSQLResourceWithError(t *testing.T) { } func TestSQLQuantizer(t *testing.T) { - assert := assert.New(t) - cases := []sqlTestCase{ { "select * from users where id = 42", @@ -340,6 +338,26 @@ FROM [Blogs] AS [b] ORDER BY [b].[Name]`, `SELECT [ b ] . [ BlogId ], [ b ] . [ Name ] FROM [ Blogs ] ORDER BY [ b ] . [ Name ]`, }, + { + `SELECT * FROM users WHERE firstname=''`, + `SELECT * FROM users WHERE firstname = ?`, + }, + { + `SELECT * FROM users WHERE firstname=' '`, + `SELECT * FROM users WHERE firstname = ?`, + }, + { + `SELECT * FROM users WHERE firstname=""`, + `SELECT * FROM users WHERE firstname = ""`, + }, + { + `SELECT * FROM users WHERE lastname=" "`, + `SELECT * FROM users WHERE lastname = ""`, + }, + { + `SELECT * FROM users WHERE lastname=" "`, + `SELECT * FROM users WHERE lastname = ""`, + }, { `SELECT [b].[BlogId], [b].[Name] FROM [Blogs] AS [b @@ -348,10 +366,12 @@ ORDER BY [b].[Name]`, }, } - for _, c := range cases { - s := SQLSpan(c.query) - NewObfuscator(nil).Obfuscate(s) - assert.Equal(c.expected, s.Resource) + for i, c := range cases { + t.Run(strconv.Itoa(i), func(t *testing.T) { + s := SQLSpan(c.query) + NewObfuscator(nil).Obfuscate(s) + assert.Equal(t, c.expected, s.Resource) + }) } } diff --git a/obfuscate/sql_tokenizer.go b/obfuscate/sql_tokenizer.go index f1468595e..757d1f56b 100644 --- a/obfuscate/sql_tokenizer.go +++ b/obfuscate/sql_tokenizer.go @@ -3,6 +3,7 @@ package obfuscate import ( "bytes" "strings" + "unicode" ) // tokenizer.go implemenents a lexer-like iterator that tokenizes SQL and CQL @@ -408,7 +409,15 @@ func (tkn *Tokenizer) scanString(delim uint16, typ int) (int, []byte) { } buffer.WriteByte(byte(ch)) } - return typ, buffer.Bytes() + buf := buffer.Bytes() + if typ == ID && len(buf) == 0 || bytes.IndexFunc(buf, func(r rune) bool { return !unicode.IsSpace(r) }) == -1 { + // This string is an empty or white-space only identifier. + // We should keep the start and end delimiters in order to + // avoid creating invalid queries. + // See: https://github.com/DataDog/datadog-trace-agent/issues/316 + return typ, []byte{byte(delim), byte(delim)} + } + return typ, buf } func (tkn *Tokenizer) scanCommentType1(prefix string) (int, []byte) {