From 40cffbdd8e5079e86ba3a3b3838e305cf2a3a0fe Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 13 Apr 2018 14:37:56 +0200 Subject: [PATCH 1/8] [SPARK-23901][SQL] Add masking functions --- .../expressions/MaskExpressionsUtils.java | 80 +++ .../catalyst/analysis/FunctionRegistry.scala | 8 + .../expressions/maskExpressions.scala | 533 ++++++++++++++++++ .../expressions/MaskExpressionsSuite.scala | 226 ++++++++ .../org/apache/spark/sql/functions.scala | 119 ++++ .../spark/sql/DataFrameFunctionsSuite.scala | 97 ++++ 6 files changed, 1063 insertions(+) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java new file mode 100644 index 0000000000000..b88b12df4531a --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions; + +/** + * Contains all the Utils methods used in the masking expressions. + */ +public class MaskExpressionsUtils { + final static int UNMASKED_VAL = -1; + + /** + * + * @param c the character to transform + * @param maskedUpperChar the character to use instead of a uppercase letter + * @param maskedLowerChar the character to use instead of a lowercase letter + * @param maskedDigitChar the character to use instead of a digit + * @param maskedOtherChar the character to use instead of a any other character + * @return masking character for {@param c} + */ + public static int transformChar( + final int c, + int maskedUpperChar, + int maskedLowerChar, + int maskedDigitChar, + int maskedOtherChar) { + switch(Character.getType(c)) { + case Character.UPPERCASE_LETTER: + if(maskedUpperChar != UNMASKED_VAL) { + return maskedUpperChar; + } + break; + + case Character.LOWERCASE_LETTER: + if(maskedLowerChar != UNMASKED_VAL) { + return maskedLowerChar; + } + break; + + case Character.DECIMAL_DIGIT_NUMBER: + if(maskedDigitChar != UNMASKED_VAL) { + return maskedDigitChar; + } + break; + + default: + if(maskedOtherChar != UNMASKED_VAL) { + return maskedOtherChar; + } + break; + } + + return c; + } + + /** + * Returns the replacement char to use according to the {@param rep} specified by the user and + * the {@param def} default. + */ + public static int getReplacementChar(String rep, int def) { + if (rep != null && rep.length() > 0) { + return rep.charAt(0); + } + return def; + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 131b958239e41..58bd5017b709e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -411,6 +411,14 @@ object FunctionRegistry { expression[SortArray]("sort_array"), CreateStruct.registryEntry, + // mask functions + expression[Mask]("mask"), + expression[MaskFirstN]("mask_first_n"), + expression[MaskLastN]("mask_last_n"), + expression[MaskShowFirstN]("mask_show_first_n"), + expression[MaskShowLastN]("mask_show_last_n"), + expression[MaskHash]("mask_hash"), + // misc functions expression[AssertTrue]("assert_true"), expression[Crc32]("crc32"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala new file mode 100644 index 0000000000000..fd0d327bc97d1 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -0,0 +1,533 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.commons.codec.digest.DigestUtils +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.MaskExpressionsUtils._ +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenerator, CodegenContext, ExprCode} +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + + +trait MaskLike { + val defaultMaskedUppercase: Int = 'X' + val defaultMaskedLowercase: Int = 'x' + val defaultMaskedDigit: Int = 'n' + val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL + + def upper: String + def lower: String + def digit: String + + protected lazy val upperReplacement: Int = getReplacementChar(upper, defaultMaskedUppercase) + protected lazy val lowerReplacement: Int = getReplacementChar(lower, defaultMaskedLowercase) + protected lazy val digitReplacement: Int = getReplacementChar(digit, defaultMaskedDigit) + + protected val maskUtilsClassName: String = classOf[MaskExpressionsUtils].getName + + def maskAndAppendToStringBuilderCode( + ctx: CodegenContext, + sb: String, + inputString: String, + start: String, + end: String): String = { + val i = ctx.freshName("i") + s""" + |for (${CodeGenerator.JAVA_INT} $i = $start; $i < $end; $i ++) { + | $sb.appendCodePoint($maskUtilsClassName.transformChar($inputString.charAt($i), + | $upperReplacement, $lowerReplacement, + | $digitReplacement, $defaultMaskedOther)); + |} + """.stripMargin + } + + def appendUnchangedToStringBuilderCode( + ctx: CodegenContext, + sb: String, + inputString: String, + start: String, + end: String): String = { + val i = ctx.freshName("i") + s""" + |for (${CodeGenerator.JAVA_INT} $i = $start; $i < $end; $i ++) { + | $sb.appendCodePoint($inputString.charAt($i)); + |} + """.stripMargin + } +} + +trait MaskLikeWithN extends MaskLike { + def n: Int + protected lazy val charCount: Int = if (n < 0) 0 else n +} + +/** + * Utils for mask operations. + */ +object MaskLike { + val defaultCharCount = 4 + + def extractCharCount(e: Expression): Int = e match { + case Literal(i, IntegerType|NullType) => + if (i == null) defaultCharCount else i.asInstanceOf[Int] + case Literal(_, dt) => throw new AnalysisException(s"Expected literal expression of type " + + s"${IntegerType.simpleString}, but got literal of ${dt.simpleString}") + case _ => defaultCharCount + } + + def extractReplacement(e: Expression): String = e match { + case Literal(s, StringType|NullType) => if (s == null) null else s.toString + case Literal(_, dt) => throw new AnalysisException(s"Expected literal expression of type " + + s"${StringType.simpleString}, but got literal of ${dt.simpleString}") + case _ => null + } +} + +/** + * Masks the input string. Additional parameters can be set to change the masking chars for + * uppercase letters, lowercase letters and digits. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str[, upper[, lower[, digit]]]) - Masks str. By default, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.", + examples = """ + Examples: + > SELECT _FUNC_("abcd-EFGH-8765-4321", "U", "l", "#"); + llll-UUUU-####-#### + """) +// scalastyle:on line.size.limit +case class Mask(child: Expression, upper: String, lower: String, digit: String) + extends UnaryExpression with ExpectsInputTypes with MaskLike { + + def this(child: Expression) = this(child, null.asInstanceOf[String], null, null) + + def this(child: Expression, upper: Expression) = + this(child, MaskLike.extractReplacement(upper), null, null) + + def this(child: Expression, upper: Expression, lower: Expression) = + this(child, MaskLike.extractReplacement(upper), MaskLike.extractReplacement(lower), null) + + def this(child: Expression, upper: Expression, lower: Expression, digit: Expression) = + this(child, + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + MaskLike.extractReplacement(digit)) + + override def nullSafeEval(input: Any): Any = { + val res = input.asInstanceOf[UTF8String].toString.map(transformChar( + _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) + UTF8String.fromString(res) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (input: String) => { + val sb = ctx.freshName("sb") + val inputString = ctx.freshName("inputString") + s""" + |String $inputString = $input.toString(); + |StringBuilder $sb = new StringBuilder($inputString.length()); + |${maskAndAppendToStringBuilderCode(ctx, sb, inputString, "0", s"$inputString.length()")} + |${ev.value} = UTF8String.fromString($sb.toString()); + |""".stripMargin + }) + } + + override def dataType: DataType = StringType + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) +} + +/** + * Masks the first N chars of the input string. N defaults to 4. Additional parameters can be set + * to change the masking chars for uppercase letters, lowercase letters and digits. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks the first n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.", + examples = """ + Examples: + > SELECT _FUNC_("1234-5678-8765-4321", 4); + nnnn-5678-8765-4321 + """) +// scalastyle:on line.size.limit +case class MaskFirstN( + child: Expression, + n: Int, + upper: String, + lower: String, + digit: String) + extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { + + def this(child: Expression) = + this(child, MaskLike.defaultCharCount, null, null, null) + + def this(child: Expression, n: Expression) = + this(child, MaskLike.extractCharCount(n), null, null, null) + + def this(child: Expression, n: Expression, upper: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + null, + null) + + def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + null) + + def this( + child: Expression, + n: Expression, + upper: Expression, + lower: Expression, + digit: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + MaskLike.extractReplacement(digit)) + + override def nullSafeEval(input: Any): Any = { + val inputString = input.asInstanceOf[UTF8String].toString + val (firstN, others) = inputString.splitAt(charCount) + val transformed = firstN.map(transformChar( + _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) + UTF8String.fromString(transformed + others) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (input: String) => { + val sb = ctx.freshName("sb") + val inputString = ctx.freshName("inputString") + val endOfMask = ctx.freshName("endOfMask") + s""" + |String $inputString = $input.toString(); + |${CodeGenerator.JAVA_INT} $endOfMask = $charCount > $inputString.length() ? + | $inputString.length() : $charCount; + |StringBuilder $sb = new StringBuilder($inputString.length()); + |${maskAndAppendToStringBuilderCode(ctx, sb, inputString, "0", endOfMask)} + |${appendUnchangedToStringBuilderCode( + ctx, sb, inputString, endOfMask, s"$inputString.length()")} + |${ev.value} = UTF8String.fromString($sb.toString()); + |""".stripMargin + }) + } + + override def dataType: DataType = StringType + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + + override def prettyName: String = "mask_first_n" +} + +/** + * Masks the last N chars of the input string. N defaults to 4. Additional parameters can be set + * to change the masking chars for uppercase letters, lowercase letters and digits. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks the last n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.", + examples = """ + Examples: + > SELECT _FUNC_("1234-5678-8765-4321", 4); + 1234-5678-8765-nnnn + """, since = "2.4.0") +// scalastyle:on line.size.limit +case class MaskLastN( + child: Expression, + n: Int, + upper: String, + lower: String, + digit: String) + extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { + + def this(child: Expression) = + this(child, MaskLike.defaultCharCount, null, null, null) + + def this(child: Expression, n: Expression) = + this(child, MaskLike.extractCharCount(n), null, null, null) + + def this(child: Expression, n: Expression, upper: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + null, + null) + + def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + null) + + def this( + child: Expression, + n: Expression, + upper: Expression, + lower: Expression, + digit: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + MaskLike.extractReplacement(digit)) + + override def nullSafeEval(input: Any): Any = { + val inputString = input.asInstanceOf[UTF8String].toString + val (others, lastN) = inputString.splitAt(inputString.length - charCount) + val transformed = lastN.map(transformChar( + _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) + UTF8String.fromString(others + transformed) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (input: String) => { + val sb = ctx.freshName("sb") + val inputString = ctx.freshName("inputString") + val startOfMask = ctx.freshName("startOfMask") + s""" + |String $inputString = $input.toString(); + |${CodeGenerator.JAVA_INT} $startOfMask = $charCount >= $inputString.length() ? + | 0 : $inputString.length() - $charCount; + |StringBuilder $sb = new StringBuilder($inputString.length()); + |${appendUnchangedToStringBuilderCode(ctx, sb, inputString, "0", startOfMask)} + |${maskAndAppendToStringBuilderCode( + ctx, sb, inputString, startOfMask, s"$inputString.length()")} + |${ev.value} = UTF8String.fromString($sb.toString()); + |""".stripMargin + }) + } + + override def dataType: DataType = StringType + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + + override def prettyName: String = "mask_last_n" +} + +/** + * Masks all but the first N chars of the input string. N defaults to 4. Additional parameters can + * be set to change the masking chars for uppercase letters, lowercase letters and digits. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks all but the first n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.", + examples = """ + Examples: + > SELECT _FUNC_("1234-5678-8765-4321", 4); + 1234-nnnn-nnnn-nnnn + """, since = "2.4.0") +// scalastyle:on line.size.limit +case class MaskShowFirstN( + child: Expression, + n: Int, + upper: String, + lower: String, + digit: String) + extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { + + def this(child: Expression) = + this(child, MaskLike.defaultCharCount, null, null, null) + + def this(child: Expression, n: Expression) = + this(child, MaskLike.extractCharCount(n), null, null, null) + + def this(child: Expression, n: Expression, upper: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + null, + null) + + def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + null) + + def this( + child: Expression, + n: Expression, + upper: Expression, + lower: Expression, + digit: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + MaskLike.extractReplacement(digit)) + + override def nullSafeEval(input: Any): Any = { + val inputString = input.asInstanceOf[UTF8String].toString + val (firstN, others) = inputString.splitAt(charCount) + val transformed = others.map(transformChar( + _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) + UTF8String.fromString(firstN + transformed) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (input: String) => { + val sb = ctx.freshName("sb") + val inputString = ctx.freshName("inputString") + val startOfMask = ctx.freshName("startOfMask") + s""" + |String $inputString = $input.toString(); + |${CodeGenerator.JAVA_INT} $startOfMask = $charCount > $inputString.length() ? + | $inputString.length() : $charCount; + |StringBuilder $sb = new StringBuilder($inputString.length()); + |${appendUnchangedToStringBuilderCode(ctx, sb, inputString, "0", startOfMask)} + |${maskAndAppendToStringBuilderCode( + ctx, sb, inputString, startOfMask, s"$inputString.length()")} + |${ev.value} = UTF8String.fromString($sb.toString()); + |""".stripMargin + }) + } + + override def dataType: DataType = StringType + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + + override def prettyName: String = "mask_show_first_n" +} + +/** + * Masks all but the last N chars of the input string. N defaults to 4. Additional parameters can + * be set to change the masking chars for uppercase letters, lowercase letters and digits. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str[, n[, upper[, lower[, digit]]]]) - Masks all but the last n values of str. By default, n is 4, upper case letters are converted to \"X\", lower case letters are converted to \"x\" and numbers are converted to \"n\". You can override the characters used in the mask by supplying additional arguments: the second argument controls the mask character for upper case letters, the third argument for lower case letters and the fourth argument for numbers.", + examples = """ + Examples: + > SELECT _FUNC_("1234-5678-8765-4321", 4); + nnnn-nnnn-nnnn-4321 + """, since = "2.4.0") +// scalastyle:on line.size.limit +case class MaskShowLastN( + child: Expression, + n: Int, + upper: String, + lower: String, + digit: String) + extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { + + def this(child: Expression) = + this(child, MaskLike.defaultCharCount, null, null, null) + + def this(child: Expression, n: Expression) = + this(child, MaskLike.extractCharCount(n), null, null, null) + + def this(child: Expression, n: Expression, upper: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + null, + null) + + def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + null) + + def this( + child: Expression, + n: Expression, + upper: Expression, + lower: Expression, + digit: Expression) = + this(child, + MaskLike.extractCharCount(n), + MaskLike.extractReplacement(upper), + MaskLike.extractReplacement(lower), + MaskLike.extractReplacement(digit)) + + override def nullSafeEval(input: Any): Any = { + val inputString = input.asInstanceOf[UTF8String].toString + val (others, lastN) = inputString.splitAt(inputString.length - charCount) + val transformed = others.map(transformChar( + _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) + UTF8String.fromString(transformed + lastN) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (input: String) => { + val sb = ctx.freshName("sb") + val inputString = ctx.freshName("inputString") + val endOfMask = ctx.freshName("endOfMask") + s""" + |String $inputString = $input.toString(); + |${CodeGenerator.JAVA_INT} $endOfMask = $charCount >= $inputString.length() ? + | 0 : $inputString.length() - $charCount; + |StringBuilder $sb = new StringBuilder($inputString.length()); + |${maskAndAppendToStringBuilderCode(ctx, sb, inputString, "0", endOfMask)} + |${appendUnchangedToStringBuilderCode( + ctx, sb, inputString, endOfMask, s"$inputString.length()")} + |${ev.value} = UTF8String.fromString($sb.toString()); + |""".stripMargin + }) + } + + override def dataType: DataType = StringType + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + + override def prettyName: String = "mask_show_last_n" +} + +/** + * Returns a hashed value based on str. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str) - Returns a hashed value based on str. The hash is consistent and can be used to join masked values together across tables.", + examples = """ + Examples: + > SELECT _FUNC_("abcd-EFGH-8765-4321"); + 60c713f5ec6912229d2060df1c322776 + """) +// scalastyle:on line.size.limit +case class MaskHash(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + + override def nullSafeEval(input: Any): Any = { + UTF8String.fromString(DigestUtils.md5Hex(input.asInstanceOf[UTF8String].toString)) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (input: String) => { + val digestUtilsClass = classOf[DigestUtils].getName.stripSuffix("$") + s""" + |${ev.value} = UTF8String.fromString($digestUtilsClass.md5Hex($input.toString())); + |""".stripMargin + }) + } + + override def dataType: DataType = StringType + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + + override def prettyName: String = "mask_hash" +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala new file mode 100644 index 0000000000000..088c89d8335df --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.types.{IntegerType, StringType} + +class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + + test("mask") { + checkEvaluation(Mask(Literal("abcd-EFGH-8765-4321"), "U", "l", "#"), "llll-UUUU-####-####") + checkEvaluation( + new Mask(Literal("abcd-EFGH-8765-4321"), Literal("U"), Literal("l"), Literal("#")), + "llll-UUUU-####-####") + checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("U"), Literal("l")), + "llll-UUUU-nnnn-nnnn") + checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("U")), "xxxx-UUUU-nnnn-nnnn") + checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321")), "xxxx-XXXX-nnnn-nnnn") + checkEvaluation(new Mask(Literal(null, StringType)), null) + checkEvaluation(Mask(Literal("abcd-EFGH-8765-4321"), null, "l", "#"), "llll-XXXX-####-####") + checkEvaluation(new Mask( + Literal("abcd-EFGH-8765-4321"), + Literal(null, StringType), + Literal(null, StringType), + Literal(null, StringType)), "xxxx-XXXX-nnnn-nnnn") + checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("Upper")), + "xxxx-UUUU-nnnn-nnnn") + checkEvaluation(new Mask(Literal("")), "") + checkEvaluation(new Mask(Literal("abcd-EFGH-8765-4321"), Literal("")), "xxxx-XXXX-nnnn-nnnn") + checkEvaluation(Mask(Literal("abcd-EFGH-8765-4321"), "", "", ""), "xxxx-XXXX-nnnn-nnnn") + // scalastyle:off nonascii + checkEvaluation(Mask(Literal("Ul9U"), "\u2200", null, null), "\u2200xn\u2200") + // scalastyle:on nonascii + intercept[AnalysisException] { + checkEvaluation(new Mask(Literal(""), Literal(1)), "") + } + } + + test("mask_first_n") { + checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), 6, "U", "l", "#"), + "llll-UFGH-8765-4321") + checkEvaluation(new MaskFirstN( + Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")), + "llll-UFGH-8765-4321") + checkEvaluation( + new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l")), + "llll-UFGH-8765-4321") + checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U")), + "xxxx-UFGH-8765-4321") + checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6)), + "xxxx-XFGH-8765-4321") + intercept[AnalysisException] { + checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal("U")), "") + } + checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321")), "xxxx-EFGH-8765-4321") + checkEvaluation(new MaskFirstN(Literal(null, StringType)), null) + checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null), + "llll-EFGH-8765-4321") + checkEvaluation(new MaskFirstN( + Literal("abcd-EFGH-8765-4321"), + Literal(null, IntegerType), + Literal(null, StringType), + Literal(null, StringType), + Literal(null, StringType)), "xxxx-EFGH-8765-4321") + checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("Upper")), + "xxxx-UFGH-8765-4321") + checkEvaluation(new MaskFirstN(Literal("")), "") + checkEvaluation(new MaskFirstN(Literal("abcd-EFGH-8765-4321"), Literal(4), Literal("")), + "xxxx-EFGH-8765-4321") + checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""), + "xxxx-XXXX-nnnn-nnnn") + checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""), + "abcd-EFGH-8765-4321") + // scalastyle:off nonascii + checkEvaluation(MaskFirstN(Literal("Ul9U"), 2, "\u2200", null, null), "\u2200x9U") + // scalastyle:on nonascii + } + + test("mask_last_n") { + checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765"), 6, "U", "l", "#"), + "abcd-EFGU-####") + checkEvaluation(new MaskLastN( + Literal("abcd-EFGH-8765"), Literal(6), Literal("U"), Literal("l"), Literal("#")), + "abcd-EFGU-####") + checkEvaluation( + new MaskLastN(Literal("abcd-EFGH-8765"), Literal(6), Literal("U"), Literal("l")), + "abcd-EFGU-nnnn") + checkEvaluation( + new MaskLastN(Literal("abcd-EFGH-8765"), Literal(6), Literal("U")), + "abcd-EFGU-nnnn") + checkEvaluation( + new MaskLastN(Literal("abcd-EFGH-8765"), Literal(6)), + "abcd-EFGX-nnnn") + intercept[AnalysisException] { + checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765"), Literal("U")), "") + } + checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765-4321")), "abcd-EFGH-8765-nnnn") + checkEvaluation(new MaskLastN(Literal(null, StringType)), null) + checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null), + "abcd-EFGH-8765-nnnn") + checkEvaluation(new MaskLastN( + Literal("abcd-EFGH-8765-4321"), + Literal(null, IntegerType), + Literal(null, StringType), + Literal(null, StringType), + Literal(null, StringType)), "abcd-EFGH-8765-nnnn") + checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765-4321"), Literal(12), Literal("Upper")), + "abcd-EFUU-nnnn-nnnn") + checkEvaluation(new MaskLastN(Literal("")), "") + checkEvaluation(new MaskLastN(Literal("abcd-EFGH-8765-4321"), Literal(16), Literal("")), + "abcx-XXXX-nnnn-nnnn") + checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""), + "xxxx-XXXX-nnnn-nnnn") + checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""), + "abcd-EFGH-8765-4321") + // scalastyle:off nonascii + checkEvaluation(MaskLastN(Literal("Ul9U"), 2, "\u2200", null, null), "Uln\u2200") + // scalastyle:on nonascii + } + + test("mask_show_first_n") { + checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), 6, "U", "l", "#"), + "abcd-EUUU-####-####") + checkEvaluation(new MaskShowFirstN( + Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")), + "abcd-EUUU-####-####") + checkEvaluation( + new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l")), + "abcd-EUUU-nnnn-nnnn") + checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U")), + "abcd-EUUU-nnnn-nnnn") + checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6)), + "abcd-EXXX-nnnn-nnnn") + intercept[AnalysisException] { + checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal("U")), "") + } + checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321")), "abcd-XXXX-nnnn-nnnn") + checkEvaluation(new MaskShowFirstN(Literal(null, StringType)), null) + checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null), + "abcd-UUUU-nnnn-nnnn") + checkEvaluation(new MaskShowFirstN( + Literal("abcd-EFGH-8765-4321"), + Literal(null, IntegerType), + Literal(null, StringType), + Literal(null, StringType), + Literal(null, StringType)), "abcd-XXXX-nnnn-nnnn") + checkEvaluation( + new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("Upper")), + "abcd-EUUU-nnnn-nnnn") + checkEvaluation(new MaskShowFirstN(Literal("")), "") + checkEvaluation(new MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), Literal(4), Literal("")), + "abcd-XXXX-nnnn-nnnn") + checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""), + "abcd-EFGH-8765-4321") + checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""), + "xxxx-XXXX-nnnn-nnnn") + // scalastyle:off nonascii + checkEvaluation(MaskShowFirstN(Literal("Ul9U"), 2, "\u2200", null, null), "Uln\u2200") + // scalastyle:on nonascii + } + + test("mask_show_last_n") { + checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765"), 6, "U", "l", "#"), + "llll-UUUH-8765") + checkEvaluation(new MaskShowLastN( + Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")), + "llll-UUUU-###5-4321") + checkEvaluation( + new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l")), + "llll-UUUU-nnn5-4321") + checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U")), + "xxxx-UUUU-nnn5-4321") + checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6)), + "xxxx-XXXX-nnn5-4321") + intercept[AnalysisException] { + checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal("U")), "") + } + checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321")), "xxxx-XXXX-nnnn-4321") + checkEvaluation(new MaskShowLastN(Literal(null, StringType)), null) + checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765-4321"), 4, "U", "l", null), + "llll-UUUU-nnnn-4321") + checkEvaluation(new MaskShowLastN( + Literal("abcd-EFGH-8765-4321"), + Literal(null, IntegerType), + Literal(null, StringType), + Literal(null, StringType), + Literal(null, StringType)), "xxxx-XXXX-nnnn-4321") + checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("Upper")), + "xxxx-UUUU-nnn5-4321") + checkEvaluation(new MaskShowLastN(Literal("")), "") + checkEvaluation(new MaskShowLastN(Literal("abcd-EFGH-8765-4321"), Literal(4), Literal("")), + "xxxx-XXXX-nnnn-4321") + checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765-4321"), 1000, "", "", ""), + "abcd-EFGH-8765-4321") + checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765-4321"), -1, "", "", ""), + "xxxx-XXXX-nnnn-nnnn") + // scalastyle:off nonascii + checkEvaluation(MaskShowLastN(Literal("Ul9U"), 2, "\u2200", null, null), "\u2200x9U") + // scalastyle:on nonascii + } + + test("mask_hash") { + checkEvaluation(MaskHash(Literal("abcd-EFGH-8765-4321")), "60c713f5ec6912229d2060df1c322776") + checkEvaluation(MaskHash(Literal("")), "d41d8cd98f00b204e9800998ecf8427e") + checkEvaluation(MaskHash(Literal(null, StringType)), null) + // scalastyle:off nonascii + checkEvaluation(MaskHash(Literal("\u2200x9U")), "f1243ef123d516b1f32a3a75309e5711") + // scalastyle:on nonascii + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c658f25ced053..c82b4d50fbd05 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3314,6 +3314,125 @@ object functions { */ def map_values(e: Column): Column = withExpr { MapValues(e.expr) } + ////////////////////////////////////////////////////////////////////////////////////////////// + // Mask functions + ////////////////////////////////////////////////////////////////////////////////////////////// + /** + * Returns a string which is the masked representation of the input. + * @group mask_funcs + * @since 2.4.0 + */ + def mask(e: Column): Column = withExpr { new Mask(e.expr) } + + /** + * Returns a string which is the masked representation of the input, using `upper`, `lower` and + * `digit` as replacement characters. + * @group mask_funcs + * @since 2.4.0 + */ + def mask(e: Column, upper: String, lower: String, digit: String): Column = withExpr { + Mask(e.expr, upper, lower, digit) + } + + /** + * Returns a string with the first `n` characters masked. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_first_n(e: Column, n: Int): Column = withExpr { new MaskFirstN(e.expr, Literal(n)) } + + /** + * Returns a string with the first `n` characters masked, using `upper`, `lower` and `digit` as + * replacement characters. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_first_n( + e: Column, + n: Int, + upper: String, + lower: String, + digit: String): Column = withExpr { + MaskFirstN(e.expr, n, upper, lower, digit) + } + + /** + * Returns a string with the last `n` characters masked. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_last_n(e: Column, n: Int): Column = withExpr { new MaskLastN(e.expr, Literal(n)) } + + /** + * Returns a string with the last `n` characters masked, using `upper`, `lower` and `digit` as + * replacement characters. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_last_n( + e: Column, + n: Int, + upper: String, + lower: String, + digit: String): Column = withExpr { + MaskLastN(e.expr, n, upper, lower, digit) + } + + /** + * Returns a string with all but the first `n` characters masked. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_show_first_n(e: Column, n: Int): Column = withExpr { + new MaskShowFirstN(e.expr, Literal(n)) + } + + /** + * Returns a string with all but the first `n` characters masked, using `upper`, `lower` and + * `digit` as replacement characters. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_show_first_n( + e: Column, + n: Int, + upper: String, + lower: String, + digit: String): Column = withExpr { + MaskShowFirstN(e.expr, n, upper, lower, digit) + } + + /** + * Returns a string with all but the last `n` characters masked. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_show_last_n(e: Column, n: Int): Column = withExpr { + new MaskShowLastN(e.expr, Literal(n)) + } + + /** + * Returns a string with all but the last `n` characters masked, using `upper`, `lower` and + * `digit` as replacement characters. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_show_last_n( + e: Column, + n: Int, + upper: String, + lower: String, + digit: String): Column = withExpr { + MaskShowLastN(e.expr, n, upper, lower, digit) + } + + /** + * Returns a hashed value based on the input column. + * @group mask_funcs + * @since 2.4.0 + */ + def mask_hash(e: Column): Column = withExpr { MaskHash(e.expr) } + // scalastyle:off line.size.limit // scalastyle:off parameter.number diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 50e475984f458..74338fbe15d49 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -276,6 +276,103 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { ) } + test("mask functions") { + val df = Seq("TestString-123", "", null).toDF("a") + checkAnswer(df.select(mask($"a")), Seq(Row("XxxxXxxxxx-nnn"), Row(""), Row(null))) + checkAnswer(df.select(mask_first_n($"a", 4)), Seq(Row("XxxxString-123"), Row(""), Row(null))) + checkAnswer(df.select(mask_last_n($"a", 4)), Seq(Row("TestString-nnn"), Row(""), Row(null))) + checkAnswer(df.select(mask_show_first_n($"a", 4)), + Seq(Row("TestXxxxxx-nnn"), Row(""), Row(null))) + checkAnswer(df.select(mask_show_last_n($"a", 4)), + Seq(Row("XxxxXxxxxx-123"), Row(""), Row(null))) + checkAnswer(df.select(mask_hash($"a")), + Seq(Row("dd78d68ad1b23bde126812482dd70ac6"), + Row("d41d8cd98f00b204e9800998ecf8427e"), + Row(null))) + + checkAnswer(df.select(mask($"a", "U", "l", "#")), + Seq(Row("UlllUlllll-###"), Row(""), Row(null))) + checkAnswer(df.select(mask_first_n($"a", 4, "U", "l", "#")), + Seq(Row("UlllString-123"), Row(""), Row(null))) + checkAnswer(df.select(mask_last_n($"a", 4, "U", "l", "#")), + Seq(Row("TestString-###"), Row(""), Row(null))) + checkAnswer(df.select(mask_show_first_n($"a", 4, "U", "l", "#")), + Seq(Row("TestUlllll-###"), Row(""), Row(null))) + checkAnswer(df.select(mask_show_last_n($"a", 4, "U", "l", "#")), + Seq(Row("UlllUlllll-123"), Row(""), Row(null))) + + checkAnswer( + df.selectExpr("mask(a)", "mask(a, 'U')", "mask(a, 'U', 'l')", "mask(a, 'U', 'l', '#')"), + Seq(Row("XxxxXxxxxx-nnn", "UxxxUxxxxx-nnn", "UlllUlllll-nnn", "UlllUlllll-###"), + Row("", "", "", ""), + Row(null, null, null, null))) + checkAnswer(sql("select mask(null)"), Row(null)) + checkAnswer(sql("select mask('AAaa11', null, null, null)"), Row("XXxxnn")) + checkAnswer(df.selectExpr("mask(a, a)"), Seq(Row("XxxxXxxxxx-nnn"), Row(""), Row(null))) + + checkAnswer( + df.selectExpr( + "mask_first_n(a)", + "mask_first_n(a, 6)", + "mask_first_n(a, 6, 'U')", + "mask_first_n(a, 6, 'U', 'l')", + "mask_first_n(a, 6, 'U', 'l', '#')"), + Seq(Row("XxxxString-123", "XxxxXxring-123", "UxxxUxring-123", "UlllUlring-123", + "UlllUlring-123"), + Row("", "", "", "", ""), + Row(null, null, null, null, null))) + checkAnswer(sql("select mask_first_n(null)"), Row(null)) + checkAnswer(sql("select mask_first_n('A1aA1a', null, null, null, null)"), Row("XnxX1a")) + checkAnswer(spark.range(1).selectExpr("mask_first_n('A1aA1a', id)"), Row("XnxX1a")) + + checkAnswer( + df.selectExpr( + "mask_last_n(a)", + "mask_last_n(a, 6)", + "mask_last_n(a, 6, 'U')", + "mask_last_n(a, 6, 'U', 'l')", + "mask_last_n(a, 6, 'U', 'l', '#')"), + Seq(Row("TestString-nnn", "TestStrixx-nnn", "TestStrixx-nnn", "TestStrill-nnn", + "TestStrill-###"), + Row("", "", "", "", ""), + Row(null, null, null, null, null))) + checkAnswer(sql("select mask_last_n(null)"), Row(null)) + checkAnswer(sql("select mask_last_n('A1aA1a', null, null, null, null)"), Row("A1xXnx")) + checkAnswer(spark.range(1).selectExpr("mask_last_n('A1aA1a', id)"), Row("A1xXnx")) + + checkAnswer( + df.selectExpr( + "mask_show_first_n(a)", + "mask_show_first_n(a, 6)", + "mask_show_first_n(a, 6, 'U')", + "mask_show_first_n(a, 6, 'U', 'l')", + "mask_show_first_n(a, 6, 'U', 'l', '#')"), + Seq(Row("TestXxxxxx-nnn", "TestStxxxx-nnn", "TestStxxxx-nnn", "TestStllll-nnn", + "TestStllll-###"), + Row("", "", "", "", ""), + Row(null, null, null, null, null))) + checkAnswer(sql("select mask_show_first_n(null)"), Row(null)) + checkAnswer(sql("select mask_show_first_n('A1aA1a', null, null, null, null)"), Row("A1aAnx")) + checkAnswer(spark.range(1).selectExpr("mask_show_first_n('A1aA1a', id)"), Row("A1aAnx")) + + checkAnswer( + df.selectExpr( + "mask_show_last_n(a)", + "mask_show_last_n(a, 6)", + "mask_show_last_n(a, 6, 'U')", + "mask_show_last_n(a, 6, 'U', 'l')", + "mask_show_last_n(a, 6, 'U', 'l', '#')"), + Seq(Row("XxxxXxxxxx-123", "XxxxXxxxng-123", "UxxxUxxxng-123", "UlllUlllng-123", + "UlllUlllng-123"), + Row("", "", "", "", ""), + Row(null, null, null, null, null))) + checkAnswer(sql("select mask_show_last_n(null)"), Row(null)) + checkAnswer(sql("select mask_show_last_n('A1aA1a', null, null, null, null)"), Row("XnaA1a")) + checkAnswer(spark.range(1).selectExpr("mask_show_last_n('A1aA1a', id)"), Row("XnaA1a")) + + checkAnswer(sql("select mask_hash(null)"), Row(null)) + } + test("sort_array function") { val df = Seq( (Array[Int](2, 1, 3), Array("b", "c", "a")), From b15bbf1ef02c5c4c4cc4c739826da894898d5771 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Sat, 5 May 2018 17:21:23 +0200 Subject: [PATCH 2/8] fix scalastyle --- .../spark/sql/catalyst/expressions/maskExpressions.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala index fd0d327bc97d1..a8d76c69e48ee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -18,9 +18,10 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.commons.codec.digest.DigestUtils + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.MaskExpressionsUtils._ -import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenerator, CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String From e9da798b8be49221f357cc3d7082d3b9e55f572a Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Wed, 9 May 2018 15:25:57 +0200 Subject: [PATCH 3/8] address review comments --- .../expressions/maskExpressions.scala | 115 ++++++------------ 1 file changed, 40 insertions(+), 75 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala index a8d76c69e48ee..65eb9e449cf14 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -21,17 +21,13 @@ import org.apache.commons.codec.digest.DigestUtils import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.MaskExpressionsUtils._ +import org.apache.spark.sql.catalyst.expressions.MaskLike._ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String trait MaskLike { - val defaultMaskedUppercase: Int = 'X' - val defaultMaskedLowercase: Int = 'x' - val defaultMaskedDigit: Int = 'n' - val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL - def upper: String def lower: String def digit: String @@ -83,6 +79,10 @@ trait MaskLikeWithN extends MaskLike { */ object MaskLike { val defaultCharCount = 4 + val defaultMaskedUppercase: Int = 'X' + val defaultMaskedLowercase: Int = 'x' + val defaultMaskedDigit: Int = 'n' + val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL def extractCharCount(e: Expression): Int = e match { case Literal(i, IntegerType|NullType) => @@ -119,16 +119,13 @@ case class Mask(child: Expression, upper: String, lower: String, digit: String) def this(child: Expression) = this(child, null.asInstanceOf[String], null, null) def this(child: Expression, upper: Expression) = - this(child, MaskLike.extractReplacement(upper), null, null) + this(child, extractReplacement(upper), null, null) def this(child: Expression, upper: Expression, lower: Expression) = - this(child, MaskLike.extractReplacement(upper), MaskLike.extractReplacement(lower), null) + this(child, extractReplacement(upper), extractReplacement(lower), null) def this(child: Expression, upper: Expression, lower: Expression, digit: Expression) = - this(child, - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - MaskLike.extractReplacement(digit)) + this(child, extractReplacement(upper), extractReplacement(lower), extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { val res = input.asInstanceOf[UTF8String].toString.map(transformChar( @@ -176,24 +173,16 @@ case class MaskFirstN( extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { def this(child: Expression) = - this(child, MaskLike.defaultCharCount, null, null, null) + this(child, defaultCharCount, null, null, null) def this(child: Expression, n: Expression) = - this(child, MaskLike.extractCharCount(n), null, null, null) + this(child, extractCharCount(n), null, null, null) def this(child: Expression, n: Expression, upper: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - null, - null) + this(child, extractCharCount(n), extractReplacement(upper), null, null) def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - null) + this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null) def this( child: Expression, @@ -202,10 +191,10 @@ case class MaskFirstN( lower: Expression, digit: Expression) = this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - MaskLike.extractReplacement(digit)) + extractCharCount(n), + extractReplacement(upper), + extractReplacement(lower), + extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { val inputString = input.asInstanceOf[UTF8String].toString @@ -262,24 +251,16 @@ case class MaskLastN( extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { def this(child: Expression) = - this(child, MaskLike.defaultCharCount, null, null, null) + this(child, defaultCharCount, null, null, null) def this(child: Expression, n: Expression) = - this(child, MaskLike.extractCharCount(n), null, null, null) + this(child, extractCharCount(n), null, null, null) def this(child: Expression, n: Expression, upper: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - null, - null) + this(child, extractCharCount(n), extractReplacement(upper), null, null) def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - null) + this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null) def this( child: Expression, @@ -288,10 +269,10 @@ case class MaskLastN( lower: Expression, digit: Expression) = this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - MaskLike.extractReplacement(digit)) + extractCharCount(n), + extractReplacement(upper), + extractReplacement(lower), + extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { val inputString = input.asInstanceOf[UTF8String].toString @@ -348,24 +329,16 @@ case class MaskShowFirstN( extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { def this(child: Expression) = - this(child, MaskLike.defaultCharCount, null, null, null) + this(child, defaultCharCount, null, null, null) def this(child: Expression, n: Expression) = - this(child, MaskLike.extractCharCount(n), null, null, null) + this(child, extractCharCount(n), null, null, null) def this(child: Expression, n: Expression, upper: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - null, - null) + this(child, extractCharCount(n), extractReplacement(upper), null, null) def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - null) + this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null) def this( child: Expression, @@ -374,10 +347,10 @@ case class MaskShowFirstN( lower: Expression, digit: Expression) = this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - MaskLike.extractReplacement(digit)) + extractCharCount(n), + extractReplacement(upper), + extractReplacement(lower), + extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { val inputString = input.asInstanceOf[UTF8String].toString @@ -434,24 +407,16 @@ case class MaskShowLastN( extends UnaryExpression with ExpectsInputTypes with MaskLikeWithN { def this(child: Expression) = - this(child, MaskLike.defaultCharCount, null, null, null) + this(child, defaultCharCount, null, null, null) def this(child: Expression, n: Expression) = - this(child, MaskLike.extractCharCount(n), null, null, null) + this(child, extractCharCount(n), null, null, null) def this(child: Expression, n: Expression, upper: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - null, - null) + this(child, extractCharCount(n), extractReplacement(upper), null, null) def this(child: Expression, n: Expression, upper: Expression, lower: Expression) = - this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - null) + this(child, extractCharCount(n), extractReplacement(upper), extractReplacement(lower), null) def this( child: Expression, @@ -460,10 +425,10 @@ case class MaskShowLastN( lower: Expression, digit: Expression) = this(child, - MaskLike.extractCharCount(n), - MaskLike.extractReplacement(upper), - MaskLike.extractReplacement(lower), - MaskLike.extractReplacement(digit)) + extractCharCount(n), + extractReplacement(upper), + extractReplacement(lower), + extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { val inputString = input.asInstanceOf[UTF8String].toString From ae89d00c52ffeab937456aa444bce299f834d1c2 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 11 May 2018 18:21:32 +0200 Subject: [PATCH 4/8] support non-ascii chars --- .../expressions/MaskExpressionsUtils.java | 2 +- .../expressions/maskExpressions.scala | 184 ++++++++++++------ .../expressions/MaskExpressionsSuite.scala | 10 + 3 files changed, 138 insertions(+), 58 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java index b88b12df4531a..02f3a3f0c5343 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java @@ -73,7 +73,7 @@ public static int transformChar( */ public static int getReplacementChar(String rep, int def) { if (rep != null && rep.length() > 0) { - return rep.charAt(0); + return rep.codePointAt(0); } return def; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala index 65eb9e449cf14..4ce396ea9d4a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -38,18 +38,25 @@ trait MaskLike { protected val maskUtilsClassName: String = classOf[MaskExpressionsUtils].getName - def maskAndAppendToStringBuilderCode( + def inputStringLengthCode(inputString: String, length: String): String = { + s"${CodeGenerator.JAVA_INT} $length = $inputString.codePointCount(0, $inputString.length());" + } + + def appendMaskedToStringBuilderCode( ctx: CodegenContext, sb: String, inputString: String, - start: String, - end: String): String = { + offset: String, + numChars: String): String = { val i = ctx.freshName("i") + val codePoint = ctx.freshName("codePoint") s""" - |for (${CodeGenerator.JAVA_INT} $i = $start; $i < $end; $i ++) { - | $sb.appendCodePoint($maskUtilsClassName.transformChar($inputString.charAt($i), + |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) { + | ${CodeGenerator.JAVA_INT} $codePoint = $inputString.codePointAt($offset); + | $sb.appendCodePoint($maskUtilsClassName.transformChar($codePoint, | $upperReplacement, $lowerReplacement, | $digitReplacement, $defaultMaskedOther)); + | $offset += Character.charCount($codePoint); |} """.stripMargin } @@ -58,15 +65,51 @@ trait MaskLike { ctx: CodegenContext, sb: String, inputString: String, - start: String, - end: String): String = { + offset: String, + numChars: String): String = { val i = ctx.freshName("i") + val codePoint = ctx.freshName("codePoint") s""" - |for (${CodeGenerator.JAVA_INT} $i = $start; $i < $end; $i ++) { - | $sb.appendCodePoint($inputString.charAt($i)); + |for (${CodeGenerator.JAVA_INT} $i = 0; $i < $numChars; $i++) { + | ${CodeGenerator.JAVA_INT} $codePoint = $inputString.codePointAt($offset); + | $sb.appendCodePoint($codePoint); + | $offset += Character.charCount($codePoint); |} """.stripMargin } + + def appendMaskedToStringBuffer( + sb: StringBuffer, + inputString: String, + startOffset: Int, + numChars: Int): Int = { + var offset = startOffset + (1 to numChars) foreach { _ => + val codePoint = inputString.codePointAt(offset) + sb.appendCodePoint(transformChar( + codePoint, + upperReplacement, + lowerReplacement, + digitReplacement, + defaultMaskedOther)) + offset += Character.charCount(codePoint) + } + offset + } + + def appendUnchangedToStringBuffer( + sb: StringBuffer, + inputString: String, + startOffset: Int, + numChars: Int): Int = { + var offset = startOffset + (1 to numChars) foreach { _ => + val codePoint = inputString.codePointAt(offset) + sb.appendCodePoint(codePoint) + offset += Character.charCount(codePoint) + } + offset + } } trait MaskLikeWithN extends MaskLike { @@ -128,21 +171,27 @@ case class Mask(child: Expression, upper: String, lower: String, digit: String) this(child, extractReplacement(upper), extractReplacement(lower), extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { - val res = input.asInstanceOf[UTF8String].toString.map(transformChar( - _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) - UTF8String.fromString(res) + val str = input.asInstanceOf[UTF8String].toString + val length = str.codePointCount(0, str.length()) + val sb = new StringBuffer(length) + appendMaskedToStringBuffer(sb, str, 0, length) + UTF8String.fromString(sb.toString) } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (input: String) => { val sb = ctx.freshName("sb") + val length = ctx.freshName("length") + val offset = ctx.freshName("offset") val inputString = ctx.freshName("inputString") s""" |String $inputString = $input.toString(); - |StringBuilder $sb = new StringBuilder($inputString.length()); - |${maskAndAppendToStringBuilderCode(ctx, sb, inputString, "0", s"$inputString.length()")} + |${inputStringLengthCode(inputString, length)} + |StringBuilder $sb = new StringBuilder($length); + |${CodeGenerator.JAVA_INT} $offset = 0; + |${appendMaskedToStringBuilderCode(ctx, sb, inputString, offset, length)} |${ev.value} = UTF8String.fromString($sb.toString()); - |""".stripMargin + """.stripMargin }) } @@ -197,26 +246,31 @@ case class MaskFirstN( extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { - val inputString = input.asInstanceOf[UTF8String].toString - val (firstN, others) = inputString.splitAt(charCount) - val transformed = firstN.map(transformChar( - _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) - UTF8String.fromString(transformed + others) + val str = input.asInstanceOf[UTF8String].toString + val length = str.codePointCount(0, str.length()) + val endOfMask = if (charCount > length) length else charCount + val sb = new StringBuffer(length) + val offset = appendMaskedToStringBuffer(sb, str, 0, endOfMask) + appendUnchangedToStringBuffer(sb, str, offset, length - endOfMask) + UTF8String.fromString(sb.toString) } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (input: String) => { val sb = ctx.freshName("sb") + val length = ctx.freshName("length") + val offset = ctx.freshName("offset") val inputString = ctx.freshName("inputString") val endOfMask = ctx.freshName("endOfMask") s""" |String $inputString = $input.toString(); - |${CodeGenerator.JAVA_INT} $endOfMask = $charCount > $inputString.length() ? - | $inputString.length() : $charCount; - |StringBuilder $sb = new StringBuilder($inputString.length()); - |${maskAndAppendToStringBuilderCode(ctx, sb, inputString, "0", endOfMask)} + |${inputStringLengthCode(inputString, length)} + |${CodeGenerator.JAVA_INT} $endOfMask = $charCount > $length ? $length : $charCount; + |${CodeGenerator.JAVA_INT} $offset = 0; + |StringBuilder $sb = new StringBuilder($length); + |${appendMaskedToStringBuilderCode(ctx, sb, inputString, offset, endOfMask)} |${appendUnchangedToStringBuilderCode( - ctx, sb, inputString, endOfMask, s"$inputString.length()")} + ctx, sb, inputString, offset, s"$length - $endOfMask")} |${ev.value} = UTF8String.fromString($sb.toString()); |""".stripMargin }) @@ -275,26 +329,32 @@ case class MaskLastN( extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { - val inputString = input.asInstanceOf[UTF8String].toString - val (others, lastN) = inputString.splitAt(inputString.length - charCount) - val transformed = lastN.map(transformChar( - _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) - UTF8String.fromString(others + transformed) + val str = input.asInstanceOf[UTF8String].toString + val length = str.codePointCount(0, str.length()) + val startOfMask = if (charCount >= length) 0 else length - charCount + val sb = new StringBuffer(length) + val offset = appendUnchangedToStringBuffer(sb, str, 0, startOfMask) + appendMaskedToStringBuffer(sb, str, offset, length - startOfMask) + UTF8String.fromString(sb.toString) } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (input: String) => { val sb = ctx.freshName("sb") + val length = ctx.freshName("length") + val offset = ctx.freshName("offset") val inputString = ctx.freshName("inputString") val startOfMask = ctx.freshName("startOfMask") s""" |String $inputString = $input.toString(); - |${CodeGenerator.JAVA_INT} $startOfMask = $charCount >= $inputString.length() ? - | 0 : $inputString.length() - $charCount; - |StringBuilder $sb = new StringBuilder($inputString.length()); - |${appendUnchangedToStringBuilderCode(ctx, sb, inputString, "0", startOfMask)} - |${maskAndAppendToStringBuilderCode( - ctx, sb, inputString, startOfMask, s"$inputString.length()")} + |${inputStringLengthCode(inputString, length)} + |${CodeGenerator.JAVA_INT} $startOfMask = $charCount >= $length ? + | 0 : $length - $charCount; + |${CodeGenerator.JAVA_INT} $offset = 0; + |StringBuilder $sb = new StringBuilder($length); + |${appendUnchangedToStringBuilderCode(ctx, sb, inputString, offset, startOfMask)} + |${appendMaskedToStringBuilderCode( + ctx, sb, inputString, offset, s"$length - $startOfMask")} |${ev.value} = UTF8String.fromString($sb.toString()); |""".stripMargin }) @@ -353,26 +413,31 @@ case class MaskShowFirstN( extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { - val inputString = input.asInstanceOf[UTF8String].toString - val (firstN, others) = inputString.splitAt(charCount) - val transformed = others.map(transformChar( - _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) - UTF8String.fromString(firstN + transformed) + val str = input.asInstanceOf[UTF8String].toString + val length = str.codePointCount(0, str.length()) + val startOfMask = if (charCount > length) length else charCount + val sb = new StringBuffer(length) + val offset = appendUnchangedToStringBuffer(sb, str, 0, startOfMask) + appendMaskedToStringBuffer(sb, str, offset, length - startOfMask) + UTF8String.fromString(sb.toString) } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (input: String) => { val sb = ctx.freshName("sb") + val length = ctx.freshName("length") + val offset = ctx.freshName("offset") val inputString = ctx.freshName("inputString") val startOfMask = ctx.freshName("startOfMask") s""" |String $inputString = $input.toString(); - |${CodeGenerator.JAVA_INT} $startOfMask = $charCount > $inputString.length() ? - | $inputString.length() : $charCount; - |StringBuilder $sb = new StringBuilder($inputString.length()); - |${appendUnchangedToStringBuilderCode(ctx, sb, inputString, "0", startOfMask)} - |${maskAndAppendToStringBuilderCode( - ctx, sb, inputString, startOfMask, s"$inputString.length()")} + |${inputStringLengthCode(inputString, length)} + |${CodeGenerator.JAVA_INT} $startOfMask = $charCount > $length ? $length : $charCount; + |${CodeGenerator.JAVA_INT} $offset = 0; + |StringBuilder $sb = new StringBuilder($length); + |${appendUnchangedToStringBuilderCode(ctx, sb, inputString, offset, startOfMask)} + |${appendMaskedToStringBuilderCode( + ctx, sb, inputString, offset, s"$length - $startOfMask")} |${ev.value} = UTF8String.fromString($sb.toString()); |""".stripMargin }) @@ -431,26 +496,31 @@ case class MaskShowLastN( extractReplacement(digit)) override def nullSafeEval(input: Any): Any = { - val inputString = input.asInstanceOf[UTF8String].toString - val (others, lastN) = inputString.splitAt(inputString.length - charCount) - val transformed = others.map(transformChar( - _, upperReplacement, lowerReplacement, digitReplacement, defaultMaskedOther).toChar) - UTF8String.fromString(transformed + lastN) + val str = input.asInstanceOf[UTF8String].toString + val length = str.codePointCount(0, str.length()) + val endOfMask = if (charCount >= length) 0 else length - charCount + val sb = new StringBuffer(length) + val offset = appendMaskedToStringBuffer(sb, str, 0, endOfMask) + appendUnchangedToStringBuffer(sb, str, offset, length - endOfMask) + UTF8String.fromString(sb.toString) } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (input: String) => { val sb = ctx.freshName("sb") + val length = ctx.freshName("length") + val offset = ctx.freshName("offset") val inputString = ctx.freshName("inputString") val endOfMask = ctx.freshName("endOfMask") s""" |String $inputString = $input.toString(); - |${CodeGenerator.JAVA_INT} $endOfMask = $charCount >= $inputString.length() ? - | 0 : $inputString.length() - $charCount; - |StringBuilder $sb = new StringBuilder($inputString.length()); - |${maskAndAppendToStringBuilderCode(ctx, sb, inputString, "0", endOfMask)} + |${inputStringLengthCode(inputString, length)} + |${CodeGenerator.JAVA_INT} $endOfMask = $charCount >= $length ? 0 : $length - $charCount; + |${CodeGenerator.JAVA_INT} $offset = 0; + |StringBuilder $sb = new StringBuilder($length); + |${appendMaskedToStringBuilderCode(ctx, sb, inputString, offset, endOfMask)} |${appendUnchangedToStringBuilderCode( - ctx, sb, inputString, endOfMask, s"$inputString.length()")} + ctx, sb, inputString, offset, s"$length - $endOfMask")} |${ev.value} = UTF8String.fromString($sb.toString()); |""".stripMargin }) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala index 088c89d8335df..45ed081af35ac 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala @@ -46,6 +46,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Mask(Literal("abcd-EFGH-8765-4321"), "", "", ""), "xxxx-XXXX-nnnn-nnnn") // scalastyle:off nonascii checkEvaluation(Mask(Literal("Ul9U"), "\u2200", null, null), "\u2200xn\u2200") + checkEvaluation(new Mask(Literal("Hello World, こんにちは, 𠀋"), Literal("あ"), Literal("𡈽")), + "あ𡈽𡈽𡈽𡈽 あ𡈽𡈽𡈽𡈽, こんにちは, 𠀋") // scalastyle:on nonascii intercept[AnalysisException] { checkEvaluation(new Mask(Literal(""), Literal(1)), "") @@ -89,6 +91,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { "abcd-EFGH-8765-4321") // scalastyle:off nonascii checkEvaluation(MaskFirstN(Literal("Ul9U"), 2, "\u2200", null, null), "\u2200x9U") + checkEvaluation(new MaskFirstN(Literal("あ, 𠀋, Hello World"), Literal(10)), + "あ, 𠀋, Xxxxo World") // scalastyle:on nonascii } @@ -131,6 +135,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { "abcd-EFGH-8765-4321") // scalastyle:off nonascii checkEvaluation(MaskLastN(Literal("Ul9U"), 2, "\u2200", null, null), "Uln\u2200") + checkEvaluation(new MaskLastN(Literal("あ, 𠀋, Hello World"), Literal(10)), + "あ, 𠀋, Hxxxx Xxxxx") // scalastyle:on nonascii } @@ -172,6 +178,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { "xxxx-XXXX-nnnn-nnnn") // scalastyle:off nonascii checkEvaluation(MaskShowFirstN(Literal("Ul9U"), 2, "\u2200", null, null), "Uln\u2200") + checkEvaluation(new MaskShowFirstN(Literal("あ, 𠀋, Hello World"), Literal(10)), + "あ, 𠀋, Hellx Xxxxx") // scalastyle:on nonascii } @@ -212,6 +220,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { "xxxx-XXXX-nnnn-nnnn") // scalastyle:off nonascii checkEvaluation(MaskShowLastN(Literal("Ul9U"), 2, "\u2200", null, null), "\u2200x9U") + checkEvaluation(new MaskShowLastN(Literal("あ, 𠀋, Hello World"), Literal(10)), + "あ, 𠀋, Xello World") // scalastyle:on nonascii } From 06b8b6c9f4e7d82608d0507860b0dbe7acb6af54 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Mon, 14 May 2018 15:03:38 +0200 Subject: [PATCH 5/8] address comments --- .../expressions/MaskExpressionsUtils.java | 2 +- .../expressions/maskExpressions.scala | 22 +++++++++---------- .../expressions/MaskExpressionsSuite.scala | 8 +++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java index 02f3a3f0c5343..0caf0054b75ca 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java @@ -24,7 +24,7 @@ public class MaskExpressionsUtils { final static int UNMASKED_VAL = -1; /** - * + * Returns the masking character for {@param c} or {@param c} is it should not be masked. * @param c the character to transform * @param maskedUpperChar the character to use instead of a uppercase letter * @param maskedLowerChar the character to use instead of a lowercase letter diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala index 4ce396ea9d4a3..cdf788d8a5d0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -79,7 +79,7 @@ trait MaskLike { } def appendMaskedToStringBuffer( - sb: StringBuffer, + sb: java.lang.StringBuilder, inputString: String, startOffset: Int, numChars: Int): Int = { @@ -98,7 +98,7 @@ trait MaskLike { } def appendUnchangedToStringBuffer( - sb: StringBuffer, + sb: java.lang.StringBuilder, inputString: String, startOffset: Int, numChars: Int): Int = { @@ -128,16 +128,16 @@ object MaskLike { val defaultMaskedOther: Int = MaskExpressionsUtils.UNMASKED_VAL def extractCharCount(e: Expression): Int = e match { - case Literal(i, IntegerType|NullType) => + case Literal(i, IntegerType | NullType) => if (i == null) defaultCharCount else i.asInstanceOf[Int] - case Literal(_, dt) => throw new AnalysisException(s"Expected literal expression of type " + + case Literal(_, dt) => throw new AnalysisException("Expected literal expression of type " + s"${IntegerType.simpleString}, but got literal of ${dt.simpleString}") case _ => defaultCharCount } def extractReplacement(e: Expression): String = e match { - case Literal(s, StringType|NullType) => if (s == null) null else s.toString - case Literal(_, dt) => throw new AnalysisException(s"Expected literal expression of type " + + case Literal(s, StringType | NullType) => if (s == null) null else s.toString + case Literal(_, dt) => throw new AnalysisException("Expected literal expression of type " + s"${StringType.simpleString}, but got literal of ${dt.simpleString}") case _ => null } @@ -173,7 +173,7 @@ case class Mask(child: Expression, upper: String, lower: String, digit: String) override def nullSafeEval(input: Any): Any = { val str = input.asInstanceOf[UTF8String].toString val length = str.codePointCount(0, str.length()) - val sb = new StringBuffer(length) + val sb = new java.lang.StringBuilder(length) appendMaskedToStringBuffer(sb, str, 0, length) UTF8String.fromString(sb.toString) } @@ -249,7 +249,7 @@ case class MaskFirstN( val str = input.asInstanceOf[UTF8String].toString val length = str.codePointCount(0, str.length()) val endOfMask = if (charCount > length) length else charCount - val sb = new StringBuffer(length) + val sb = new java.lang.StringBuilder(length) val offset = appendMaskedToStringBuffer(sb, str, 0, endOfMask) appendUnchangedToStringBuffer(sb, str, offset, length - endOfMask) UTF8String.fromString(sb.toString) @@ -332,7 +332,7 @@ case class MaskLastN( val str = input.asInstanceOf[UTF8String].toString val length = str.codePointCount(0, str.length()) val startOfMask = if (charCount >= length) 0 else length - charCount - val sb = new StringBuffer(length) + val sb = new java.lang.StringBuilder(length) val offset = appendUnchangedToStringBuffer(sb, str, 0, startOfMask) appendMaskedToStringBuffer(sb, str, offset, length - startOfMask) UTF8String.fromString(sb.toString) @@ -416,7 +416,7 @@ case class MaskShowFirstN( val str = input.asInstanceOf[UTF8String].toString val length = str.codePointCount(0, str.length()) val startOfMask = if (charCount > length) length else charCount - val sb = new StringBuffer(length) + val sb = new java.lang.StringBuilder(length) val offset = appendUnchangedToStringBuffer(sb, str, 0, startOfMask) appendMaskedToStringBuffer(sb, str, offset, length - startOfMask) UTF8String.fromString(sb.toString) @@ -499,7 +499,7 @@ case class MaskShowLastN( val str = input.asInstanceOf[UTF8String].toString val length = str.codePointCount(0, str.length()) val endOfMask = if (charCount >= length) 0 else length - charCount - val sb = new StringBuffer(length) + val sb = new java.lang.StringBuilder(length) val offset = appendMaskedToStringBuffer(sb, str, 0, endOfMask) appendUnchangedToStringBuffer(sb, str, offset, length - endOfMask) UTF8String.fromString(sb.toString) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala index 45ed081af35ac..ae49488a9f0bc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala @@ -55,8 +55,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("mask_first_n") { - checkEvaluation(MaskFirstN(Literal("abcd-EFGH-8765-4321"), 6, "U", "l", "#"), - "llll-UFGH-8765-4321") + checkEvaluation(MaskFirstN(Literal("aB3d-EFGH-8765"), 6, "U", "l", "#"), + "lU#l-UFGH-8765") checkEvaluation(new MaskFirstN( Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")), "llll-UFGH-8765-4321") @@ -135,8 +135,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { "abcd-EFGH-8765-4321") // scalastyle:off nonascii checkEvaluation(MaskLastN(Literal("Ul9U"), 2, "\u2200", null, null), "Uln\u2200") - checkEvaluation(new MaskLastN(Literal("あ, 𠀋, Hello World"), Literal(10)), - "あ, 𠀋, Hxxxx Xxxxx") + checkEvaluation(new MaskLastN(Literal("あ, 𠀋, Hello World あ 𠀋"), Literal(10)), + "あ, 𠀋, Hello Xxxxx あ 𠀋") // scalastyle:on nonascii } From a1f3a5bc1f26117b5f76622d4cbe3d927b3904f1 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Tue, 15 May 2018 10:17:23 +0200 Subject: [PATCH 6/8] address comments --- .../expressions/maskExpressions.scala | 22 +++++++++---------- .../expressions/MaskExpressionsSuite.scala | 12 +++++----- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala index cdf788d8a5d0f..acbd0a13bba5f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -78,7 +78,7 @@ trait MaskLike { """.stripMargin } - def appendMaskedToStringBuffer( + def appendMaskedToStringBuilder( sb: java.lang.StringBuilder, inputString: String, startOffset: Int, @@ -97,7 +97,7 @@ trait MaskLike { offset } - def appendUnchangedToStringBuffer( + def appendUnchangedToStringBuilder( sb: java.lang.StringBuilder, inputString: String, startOffset: Int, @@ -174,7 +174,7 @@ case class Mask(child: Expression, upper: String, lower: String, digit: String) val str = input.asInstanceOf[UTF8String].toString val length = str.codePointCount(0, str.length()) val sb = new java.lang.StringBuilder(length) - appendMaskedToStringBuffer(sb, str, 0, length) + appendMaskedToStringBuilder(sb, str, 0, length) UTF8String.fromString(sb.toString) } @@ -250,8 +250,8 @@ case class MaskFirstN( val length = str.codePointCount(0, str.length()) val endOfMask = if (charCount > length) length else charCount val sb = new java.lang.StringBuilder(length) - val offset = appendMaskedToStringBuffer(sb, str, 0, endOfMask) - appendUnchangedToStringBuffer(sb, str, offset, length - endOfMask) + val offset = appendMaskedToStringBuilder(sb, str, 0, endOfMask) + appendUnchangedToStringBuilder(sb, str, offset, length - endOfMask) UTF8String.fromString(sb.toString) } @@ -333,8 +333,8 @@ case class MaskLastN( val length = str.codePointCount(0, str.length()) val startOfMask = if (charCount >= length) 0 else length - charCount val sb = new java.lang.StringBuilder(length) - val offset = appendUnchangedToStringBuffer(sb, str, 0, startOfMask) - appendMaskedToStringBuffer(sb, str, offset, length - startOfMask) + val offset = appendUnchangedToStringBuilder(sb, str, 0, startOfMask) + appendMaskedToStringBuilder(sb, str, offset, length - startOfMask) UTF8String.fromString(sb.toString) } @@ -417,8 +417,8 @@ case class MaskShowFirstN( val length = str.codePointCount(0, str.length()) val startOfMask = if (charCount > length) length else charCount val sb = new java.lang.StringBuilder(length) - val offset = appendUnchangedToStringBuffer(sb, str, 0, startOfMask) - appendMaskedToStringBuffer(sb, str, offset, length - startOfMask) + val offset = appendUnchangedToStringBuilder(sb, str, 0, startOfMask) + appendMaskedToStringBuilder(sb, str, offset, length - startOfMask) UTF8String.fromString(sb.toString) } @@ -500,8 +500,8 @@ case class MaskShowLastN( val length = str.codePointCount(0, str.length()) val endOfMask = if (charCount >= length) 0 else length - charCount val sb = new java.lang.StringBuilder(length) - val offset = appendMaskedToStringBuffer(sb, str, 0, endOfMask) - appendUnchangedToStringBuffer(sb, str, offset, length - endOfMask) + val offset = appendMaskedToStringBuilder(sb, str, 0, endOfMask) + appendUnchangedToStringBuilder(sb, str, offset, length - endOfMask) UTF8String.fromString(sb.toString) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala index ae49488a9f0bc..4d69dc32ace82 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MaskExpressionsSuite.scala @@ -97,8 +97,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("mask_last_n") { - checkEvaluation(MaskLastN(Literal("abcd-EFGH-8765"), 6, "U", "l", "#"), - "abcd-EFGU-####") + checkEvaluation(MaskLastN(Literal("abcd-EFGH-aB3d"), 6, "U", "l", "#"), + "abcd-EFGU-lU#l") checkEvaluation(new MaskLastN( Literal("abcd-EFGH-8765"), Literal(6), Literal("U"), Literal("l"), Literal("#")), "abcd-EFGU-####") @@ -141,8 +141,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("mask_show_first_n") { - checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-4321"), 6, "U", "l", "#"), - "abcd-EUUU-####-####") + checkEvaluation(MaskShowFirstN(Literal("abcd-EFGH-8765-aB3d"), 6, "U", "l", "#"), + "abcd-EUUU-####-lU#l") checkEvaluation(new MaskShowFirstN( Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")), "abcd-EUUU-####-####") @@ -184,8 +184,8 @@ class MaskExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("mask_show_last_n") { - checkEvaluation(MaskShowLastN(Literal("abcd-EFGH-8765"), 6, "U", "l", "#"), - "llll-UUUH-8765") + checkEvaluation(MaskShowLastN(Literal("aB3d-EFGH-8765"), 6, "U", "l", "#"), + "lU#l-UUUH-8765") checkEvaluation(new MaskShowLastN( Literal("abcd-EFGH-8765-4321"), Literal(6), Literal("U"), Literal("l"), Literal("#")), "llll-UUUU-###5-4321") From ca9caa0db1d8555d7def37a4897c518d683a7958 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Wed, 23 May 2018 15:37:09 +0200 Subject: [PATCH 7/8] throw exception instead of ignoring non-literals input --- .../expressions/maskExpressions.scala | 4 ++-- .../spark/sql/DataFrameFunctionsSuite.scala | 20 ++++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala index acbd0a13bba5f..276a57266a6e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala @@ -132,14 +132,14 @@ object MaskLike { if (i == null) defaultCharCount else i.asInstanceOf[Int] case Literal(_, dt) => throw new AnalysisException("Expected literal expression of type " + s"${IntegerType.simpleString}, but got literal of ${dt.simpleString}") - case _ => defaultCharCount + case other => throw new AnalysisException(s"Expected literal expression, but got ${other.sql}") } def extractReplacement(e: Expression): String = e match { case Literal(s, StringType | NullType) => if (s == null) null else s.toString case Literal(_, dt) => throw new AnalysisException("Expected literal expression of type " + s"${StringType.simpleString}, but got literal of ${dt.simpleString}") - case _ => null + case other => throw new AnalysisException(s"Expected literal expression, but got ${other.sql}") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index beec11ea1acdf..8aad03a2d0222 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -308,7 +308,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { Row(null, null, null, null))) checkAnswer(sql("select mask(null)"), Row(null)) checkAnswer(sql("select mask('AAaa11', null, null, null)"), Row("XXxxnn")) - checkAnswer(df.selectExpr("mask(a, a)"), Seq(Row("XxxxXxxxxx-nnn"), Row(""), Row(null))) + intercept[AnalysisException] { + checkAnswer(df.selectExpr("mask(a, a)"), Seq(Row("XxxxXxxxxx-nnn"), Row(""), Row(null))) + } checkAnswer( df.selectExpr( @@ -323,7 +325,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { Row(null, null, null, null, null))) checkAnswer(sql("select mask_first_n(null)"), Row(null)) checkAnswer(sql("select mask_first_n('A1aA1a', null, null, null, null)"), Row("XnxX1a")) - checkAnswer(spark.range(1).selectExpr("mask_first_n('A1aA1a', id)"), Row("XnxX1a")) + intercept[AnalysisException] { + checkAnswer(spark.range(1).selectExpr("mask_first_n('A1aA1a', id)"), Row("XnxX1a")) + } checkAnswer( df.selectExpr( @@ -338,7 +342,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { Row(null, null, null, null, null))) checkAnswer(sql("select mask_last_n(null)"), Row(null)) checkAnswer(sql("select mask_last_n('A1aA1a', null, null, null, null)"), Row("A1xXnx")) - checkAnswer(spark.range(1).selectExpr("mask_last_n('A1aA1a', id)"), Row("A1xXnx")) + intercept[AnalysisException] { + checkAnswer(spark.range(1).selectExpr("mask_last_n('A1aA1a', id)"), Row("A1xXnx")) + } checkAnswer( df.selectExpr( @@ -353,7 +359,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { Row(null, null, null, null, null))) checkAnswer(sql("select mask_show_first_n(null)"), Row(null)) checkAnswer(sql("select mask_show_first_n('A1aA1a', null, null, null, null)"), Row("A1aAnx")) - checkAnswer(spark.range(1).selectExpr("mask_show_first_n('A1aA1a', id)"), Row("A1aAnx")) + intercept[AnalysisException] { + checkAnswer(spark.range(1).selectExpr("mask_show_first_n('A1aA1a', id)"), Row("A1aAnx")) + } checkAnswer( df.selectExpr( @@ -368,7 +376,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { Row(null, null, null, null, null))) checkAnswer(sql("select mask_show_last_n(null)"), Row(null)) checkAnswer(sql("select mask_show_last_n('A1aA1a', null, null, null, null)"), Row("XnaA1a")) - checkAnswer(spark.range(1).selectExpr("mask_show_last_n('A1aA1a', id)"), Row("XnaA1a")) + intercept[AnalysisException] { + checkAnswer(spark.range(1).selectExpr("mask_show_last_n('A1aA1a', id)"), Row("XnaA1a")) + } checkAnswer(sql("select mask_hash(null)"), Row(null)) } From 6fd8f2fbd37e5193f0ffb1a25a8f4a8c71ab55bd Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Thu, 24 May 2018 10:15:56 +0200 Subject: [PATCH 8/8] fix javastyle --- .../spark/sql/catalyst/expressions/MaskExpressionsUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java index 0caf0054b75ca..05879902a4ed9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/MaskExpressionsUtils.java @@ -21,7 +21,7 @@ * Contains all the Utils methods used in the masking expressions. */ public class MaskExpressionsUtils { - final static int UNMASKED_VAL = -1; + static final int UNMASKED_VAL = -1; /** * Returns the masking character for {@param c} or {@param c} is it should not be masked.