From 8ef294a2f57427b44f4b4d4eebc72173017ea2f0 Mon Sep 17 00:00:00 2001 From: yangpeng Date: Fri, 11 Aug 2023 11:33:00 +0800 Subject: [PATCH 1/4] :bug: "Add null check in text deserialization This commit adds a null check while splitting the kv in the TextDeserializationSchema. Not having this check could potentially lead to a crash if 'kvs' has less than two elements. Now, the function will return null in this case, significantly improving reliability." --- .../format/text/TextDeserializationSchema.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java index 86f25a69b51..365bcf8faf0 100644 --- a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java +++ b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java @@ -186,10 +186,14 @@ private Object convert(String field, SeaTunnelDataType fieldType, int level) LinkedHashMap objectMap = new LinkedHashMap<>(); String[] kvs = field.split(separators[level + 1]); for (String kv : kvs) { - String[] splits = kv.split(separators[level + 2]); - objectMap.put( - convert(splits[0], keyType, level + 1), - convert(splits[1], valueType, level + 1)); + if (kvs.length < 2) { + return null; + } else { + String[] splits = kv.split(separators[level + 2]); + objectMap.put( + convert(splits[0], keyType, level + 1), + convert(splits[1], valueType, level + 1)); + } } return objectMap; case STRING: From 88d8962a23ff3effa44f1f0b144752f028c575f5 Mon Sep 17 00:00:00 2001 From: yangpeng Date: Fri, 11 Aug 2023 16:49:57 +0800 Subject: [PATCH 2/4] :bug: Refactor text deserialization, allow null value assignments Modified the TextDeserializationSchema to allow key-value pairs with null values. This adjustment helps to handle data records with missing values more accurately. The code was also reformatted for improved readability. --- .../seatunnel/format/text/TextDeserializationSchema.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java index 365bcf8faf0..d2ddc37f3d8 100644 --- a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java +++ b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java @@ -73,7 +73,8 @@ public static class Builder { DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS; private TimeUtils.Formatter timeFormatter = TimeUtils.Formatter.HH_MM_SS; - private Builder() {} + private Builder() { + } public Builder seaTunnelRowType(SeaTunnelRowType seaTunnelRowType) { this.seaTunnelRowType = seaTunnelRowType; @@ -186,10 +187,10 @@ private Object convert(String field, SeaTunnelDataType fieldType, int level) LinkedHashMap objectMap = new LinkedHashMap<>(); String[] kvs = field.split(separators[level + 1]); for (String kv : kvs) { - if (kvs.length < 2) { - return null; + String[] splits = kv.split(separators[level + 2]); + if (splits.length < 2) { + objectMap.put(convert(splits[0], keyType, level + 1), null); } else { - String[] splits = kv.split(separators[level + 2]); objectMap.put( convert(splits[0], keyType, level + 1), convert(splits[1], valueType, level + 1)); From 50ac7a8421768a584327b288b17dc71ff1e1682e Mon Sep 17 00:00:00 2001 From: yangpeng Date: Fri, 11 Aug 2023 16:53:55 +0800 Subject: [PATCH 3/4] :art: Refactor Builder constructor in TextDeserializationSchema Tidied up the Builder constructor in TextDeserializationSchema by removing unnecessary empty lines. This change improves code readability and consistency with the rest of the class? --- .../seatunnel/format/text/TextDeserializationSchema.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java index d2ddc37f3d8..1ec0499fb5e 100644 --- a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java +++ b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java @@ -73,8 +73,7 @@ public static class Builder { DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS; private TimeUtils.Formatter timeFormatter = TimeUtils.Formatter.HH_MM_SS; - private Builder() { - } + private Builder() {} public Builder seaTunnelRowType(SeaTunnelRowType seaTunnelRowType) { this.seaTunnelRowType = seaTunnelRowType; From a2ed07da65ba04dd6d76931d08fdf9137daced7d Mon Sep 17 00:00:00 2001 From: yangpeng Date: Fri, 11 Aug 2023 17:36:48 +0800 Subject: [PATCH 4/4] :white_check_mark: Add test case where the value is empty and the key is empty in the map --- .../seatunnel/format/text/TextFormatSchemaTest.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java index 7d904e2c8fc..a9068913983 100644 --- a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java @@ -45,7 +45,14 @@ public class TextFormatSchemaTest { + '\002' + "Kris" + '\003' - + "21\001" + + "21" + + '\002' + + "nullValueKey" + + '\003' + + '\002' + + '\003' + + "1231" + + "\001" + "tyrantlucifer\001" + "true\001" + "1\001"