Skip to content

Commit

Permalink
[SPARK-23094][SPARK-23723][SPARK-23724][SQL][FOLLOW-UP] Support custo…
Browse files Browse the repository at this point in the history
…m encoding for json files

## What changes were proposed in this pull request?
This is to add a test case to check the behaviors when users write json in the specified UTF-16/UTF-32 encoding with multiline off.

## How was this patch tested?
N/A

Author: gatorsmile <[email protected]>

Closes #21254 from gatorsmile/followupSPARK-23094.
  • Loading branch information
gatorsmile authored and HyukjinKwon committed May 8, 2018
1 parent b54bbe5 commit 2f6fe7d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ private[sql] class JSONOptions(
val blacklist = Seq(Charset.forName("UTF-16"), Charset.forName("UTF-32"))
val isBlacklisted = blacklist.contains(Charset.forName(enc))
require(multiLine || !isBlacklisted,
s"""The ${enc} encoding must not be included in the blacklist when multiLine is disabled:
| ${blacklist.mkString(", ")}""".stripMargin)
s"""The $enc encoding in the blacklist is not allowed when multiLine is disabled.
|Blacklist: ${blacklist.mkString(", ")}""".stripMargin)

val isLineSepRequired =
multiLine || Charset.forName(enc) == StandardCharsets.UTF_8 || lineSeparator.nonEmpty

val isLineSepRequired = !(multiLine == false &&
Charset.forName(enc) != StandardCharsets.UTF_8 && lineSeparator.isEmpty)
require(isLineSepRequired, s"The lineSep option must be specified for the $enc encoding")

enc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2313,6 +2313,25 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
}
}

test("SPARK-23723: write json in UTF-16/32 with multiline off") {
Seq("UTF-16", "UTF-32").foreach { encoding =>
withTempPath { path =>
val ds = spark.createDataset(Seq(
("a", 1), ("b", 2), ("c", 3))
).repartition(2)
val e = intercept[IllegalArgumentException] {
ds.write
.option("encoding", encoding)
.option("multiline", "false")
.format("json").mode("overwrite")
.save(path.getCanonicalPath)
}.getMessage
assert(e.contains(
s"$encoding encoding in the blacklist is not allowed when multiLine is disabled"))
}
}
}

def checkReadJson(lineSep: String, encoding: String, inferSchema: Boolean, id: Int): Unit = {
test(s"SPARK-23724: checks reading json in ${encoding} #${id}") {
val schema = new StructType().add("f1", StringType).add("f2", IntegerType)
Expand Down

0 comments on commit 2f6fe7d

Please sign in to comment.