Skip to content

Commit

Permalink
[SPARK-48693][SQL] Simplify and unify toString of Invoke and StaticIn…
Browse files Browse the repository at this point in the history
…voke

### What changes were proposed in this pull request?

The `StaticInvoke` class is used extensively by `RuntimeReplacable` expressions, due to its ugly string representation, a plan with multiple or nested `StaticInvoke` is hard to read.

This PR overrides `StaticInvoke`'s toString method to improve its readability.

```diff
 Project [left(c7#x, 2) AS left(c7, 2)#x, left(c8#x, 2) AS left(c8, 2)#x, left(v#x, 3) AS left(v, 3)#x, left(s#x, 2) AS left(s, 2)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, tru
e, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#
x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSideP
adding(c8#x, 8)) AS c8#x, v#x, s#x]
```

In contrast, the `Invoke`'s toString is overly simple, losing its child's string representations.

### Why are the changes needed?

improve plan readability and consistency

### Does this PR introduce _any_ user-facing change?

Yes, a plan containing `StaticInvoke` will change its string representation.

### How was this patch tested?

existing modified tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes apache#47066 from yaooqinn/SPARK-48693.

Authored-by: Kent Yao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
  • Loading branch information
yaooqinn committed Jun 25, 2024
1 parent 51f1103 commit 8c4ca7e
Show file tree
Hide file tree
Showing 32 changed files with 74 additions and 64 deletions.
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true) AS aes_decrypt(g, g, GCM, DEFAULT, )#0]
Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary))) AS aes_decrypt(g, g, GCM, DEFAULT, )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true) AS aes_decrypt(g, g, g, DEFAULT, )#0]
Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary))) AS aes_decrypt(g, g, g, DEFAULT, )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, )#0]
Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary))) AS aes_decrypt(g, g, g, g, )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, g)#0]
Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary))) AS aes_decrypt(g, g, g, g, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, GCM, DEFAULT, , )#0]
Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), cast( as binary))) AS aes_encrypt(g, g, GCM, DEFAULT, , )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, DEFAULT, , )#0]
Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), cast( as binary))) AS aes_encrypt(g, g, g, DEFAULT, , )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, , )#0]
Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), cast( as binary))) AS aes_encrypt(g, g, g, g, , )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, X'434445', )#0]
Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast( as binary))) AS aes_encrypt(g, g, g, g, X'434445', )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast(g#0 as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, X'434445', g)#0]
Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast(g#0 as binary))) AS aes_encrypt(g, g, g, g, X'434445', g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.BitmapExpressionUtils, LongType, bitmapBitPosition, id#0L, LongType, true, false, true) AS bitmap_bit_position(id)#0L]
Project [static_invoke(BitmapExpressionUtils.bitmapBitPosition(id#0L)) AS bitmap_bit_position(id)#0L]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.BitmapExpressionUtils, LongType, bitmapBitPosition, id#0L, LongType, true, false, true) AS bitmap_bit_position(id)#0L]
Project [static_invoke(BitmapExpressionUtils.bitmapBitPosition(id#0L)) AS bitmap_bit_position(id)#0L]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.BitmapExpressionUtils, LongType, bitmapCount, bytes#0, BinaryType, true, false, true) AS bitmap_count(bytes)#0L]
Project [static_invoke(BitmapExpressionUtils.bitmapCount(bytes#0)) AS bitmap_count(bytes)#0L]
+- LocalRelation <empty>, [id#0L, bytes#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.StringDecode, StringType, decode, cast(g#0 as binary), UTF-8, false, false, BinaryType, StringTypeAnyCollation, BooleanType, BooleanType, true, true, true) AS decode(g, UTF-8)#0]
Project [static_invoke(StringDecode.decode(cast(g#0 as binary), UTF-8, false, false)) AS decode(g, UTF-8)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.Encode, BinaryType, encode, g#0, UTF-8, false, false, StringTypeAnyCollation, StringTypeAnyCollation, BooleanType, BooleanType, true, true, true) AS encode(g, UTF-8)#0]
Project [static_invoke(Encode.encode(g#0, UTF-8, false, false)) AS encode(g, UTF-8)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, BooleanType, isVariantNull, staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, true, StringTypeAnyCollation, BooleanType, true, false, true), VariantType, false, false, true) AS is_variant_null(parse_json(g))#0]
Project [static_invoke(VariantExpressionEvalUtils.isVariantNull(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)))) AS is_variant_null(parse_json(g))#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.unsafe.types.ByteArray, BinaryType, lpad, bytes#0, 5, 0x0C0A0F0E, BinaryType, IntegerType, BinaryType, true, false, true) AS lpad(bytes, 5, X'0C0A0F0E')#0]
Project [static_invoke(ByteArray.lpad(bytes#0, 5, 0x0C0A0F0E)) AS lpad(bytes, 5, X'0C0A0F0E')#0]
+- LocalRelation <empty>, [id#0L, bytes#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, true, StringTypeAnyCollation, BooleanType, true, false, true) AS parse_json(g)#0]
Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)) AS parse_json(g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.unsafe.types.ByteArray, BinaryType, rpad, bytes#0, 5, 0x0B0A0B0E, BinaryType, IntegerType, BinaryType, true, false, true) AS rpad(bytes, 5, X'0B0A0B0E')#0]
Project [static_invoke(ByteArray.rpad(bytes#0, 5, 0x0B0A0B0E)) AS rpad(bytes, 5, X'0B0A0B0E')#0]
+- LocalRelation <empty>, [id#0L, bytes#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariant$, StringType, schemaOfVariant, staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, true, StringTypeAnyCollation, BooleanType, true, false, true), VariantType, true, false, true) AS schema_of_variant(parse_json(g))#0]
Project [static_invoke(SchemaOfVariant.schemaOfVariant(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)))) AS schema_of_variant(parse_json(g))#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Aggregate [schema_of_variant_agg(staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, true, StringTypeAnyCollation, BooleanType, true, false, true), 0, 0) AS schema_of_variant_agg(parse_json(g))#0]
Aggregate [schema_of_variant_agg(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), 0, 0) AS schema_of_variant_agg(parse_json(g))#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.Encode, BinaryType, encode, g#0, UTF-8, false, false, StringTypeAnyCollation, StringTypeAnyCollation, BooleanType, BooleanType, true, true, true) AS to_binary(g, utf-8)#0]
Project [static_invoke(Encode.encode(g#0, UTF-8, false, false)) AS to_binary(g, utf-8)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, GCM, DEFAULT, )#0]
Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary)))) AS try_aes_decrypt(g, g, GCM, DEFAULT, )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, g, DEFAULT, )#0]
Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary)))) AS try_aes_decrypt(g, g, g, DEFAULT, )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, g, g, )#0]
Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary)))) AS try_aes_decrypt(g, g, g, g, )#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, g, g, g)#0]
Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary)))) AS try_aes_decrypt(g, g, g, g, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, false, StringTypeAnyCollation, BooleanType, true, true, true) AS try_parse_json(g)#0]
Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false)) AS try_parse_json(g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [try_variant_get(staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, true, StringTypeAnyCollation, BooleanType, true, false, true), $, IntegerType, false, Some(America/Los_Angeles)) AS try_variant_get(parse_json(g), $)#0]
Project [try_variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), $, IntegerType, false, Some(America/Los_Angeles)) AS try_variant_get(parse_json(g), $)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UrlCodec$, StringType, decode, g#0, UTF-8, StringTypeAnyCollation, StringTypeAnyCollation, true, true, true) AS url_decode(g)#0]
Project [static_invoke(UrlCodec.decode(g#0, UTF-8)) AS url_decode(g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UrlCodec$, StringType, encode, g#0, UTF-8, StringTypeAnyCollation, StringTypeAnyCollation, true, true, true) AS url_encode(g)#0]
Project [static_invoke(UrlCodec.encode(g#0, UTF-8)) AS url_encode(g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [variant_get(staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, true, StringTypeAnyCollation, BooleanType, true, false, true), $, IntegerType, true, Some(America/Los_Angeles)) AS variant_get(parse_json(g), $)#0]
Project [variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), $, IntegerType, true, Some(America/Los_Angeles)) AS variant_get(parse_json(g), $)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,15 @@ case class StaticInvoke(
super.stringArgs.toSeq.dropRight(1).iterator
}
}

override def toString: String =
s"static_invoke(${
if (objectName.startsWith("org.apache.spark.")) {
cls.getSimpleName
} else {
objectName
}
}.$functionName(${arguments.mkString(", ")}))"
}

/**
Expand Down Expand Up @@ -509,7 +518,8 @@ case class Invoke(
ev.copy(code = code)
}

override def toString: String = s"$targetObject.$functionName"
override def toString: String =
s"invoke($targetObject.$functionName(${arguments.mkString(", ")}))"

override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Invoke =
copy(targetObject = newChildren.head, arguments = newChildren.tail)
Expand Down
Loading

0 comments on commit 8c4ca7e

Please sign in to comment.