From 1de272f98d0ff22d0dd151797f22b8faf310963a Mon Sep 17 00:00:00 2001 From: yangjie Date: Fri, 4 Sep 2020 09:40:35 +0900 Subject: [PATCH] [SPARK-32762][SQL][TEST] Enhance the verification of ExpressionsSchemaSuite to sql-expression-schema.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? `sql-expression-schema.md` automatically generated by `ExpressionsSchemaSuite`, but only expressions entries are checked in `ExpressionsSchemaSuite`. So if we manually modify the contents of the file,  `ExpressionsSchemaSuite` does not necessarily guarantee the correctness of the it some times. For example, [Spark-24884](https://github.com/apache/spark/pull/27507) added `regexp_extract_all` expression support, and manually modify the `sql-expression-schema.md` but not change the content of `Number of queries` cause file content inconsistency. Some additional checks have been added to `ExpressionsSchemaSuite` to improve the correctness guarantee of `sql-expression-schema.md` as follow: - `Number of queries` should equals size of `expressions entries` in `sql-expression-schema.md` - `Number of expressions that missing example` should equals size of `Expressions missing examples` in `sql-expression-schema.md` - `MissExamples` from case should same as `expectedMissingExamples` from `sql-expression-schema.md` ### Why are the changes needed? Ensure the correctness of `sql-expression-schema.md` content. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Enhanced ExpressionsSchemaSuite Closes #29608 from LuciferYang/sql-expression-schema. Authored-by: yangjie Signed-off-by: Takeshi Yamamuro --- .../sql-functions/sql-expression-schema.md | 2 +- .../spark/sql/ExpressionsSchemaSuite.scala | 29 +++++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index a212d8ce40642..53270b84e7c86 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 338 + - Number of queries: 339 - Number of expressions that missing example: 34 - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index de1517e001b47..fd964355e8194 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -152,16 +152,17 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { val outputSize = outputs.size val headerSize = header.size - val expectedOutputs: Seq[QueryOutput] = { + val (expectedMissingExamples, expectedOutputs) = { val expectedGoldenOutput = fileToString(resultFile) val lines = expectedGoldenOutput.split("\n") val expectedSize = lines.size assert(expectedSize == outputSize + headerSize, s"Expected $expectedSize blocks in result file but got " + - s"${outputSize + headerSize}. Try regenerate the result files.") + s"${outputSize + headerSize}. Try regenerating the result files.") - Seq.tabulate(outputSize) { i => + val numberOfQueries = lines(2).split(":")(1).trim.toInt + val expectedOutputs = Seq.tabulate(outputSize) { i => val segments = lines(i + headerSize).split('|') QueryOutput( className = segments(1).trim, @@ -169,6 +170,20 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { sql = segments(3).trim, schema = segments(4).trim) } + + assert(numberOfQueries == expectedOutputs.size, + s"expected outputs size: ${expectedOutputs.size} not same as numberOfQueries: " + + s"$numberOfQueries record in result file. Try regenerating the result files.") + + val numberOfMissingExamples = lines(3).split(":")(1).trim.toInt + val expectedMissingExamples = lines(4).split(":")(1).trim.split(",") + + assert(numberOfMissingExamples == expectedMissingExamples.size, + s"expected missing examples size: ${expectedMissingExamples.size} not same as " + + s"numberOfMissingExamples: $numberOfMissingExamples " + + "record in result file. Try regenerating the result files.") + + (expectedMissingExamples, expectedOutputs) } // Compare results. @@ -179,5 +194,13 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { assert(expected.sql == output.sql, "SQL query did not match") assert(expected.schema == output.schema, s"Schema did not match for query ${expected.sql}") } + + // Compare expressions missing examples + assert(expectedMissingExamples.length == missingExamples.size, + "The number of missing examples not equals the number of expected missing examples.") + + missingExamples.zip(expectedMissingExamples).foreach { case (output, expected) => + assert(expected == output, "Missing example expression not match") + } } }