Skip to content

Commit

Permalink
DBZ-8694 Set tiny/medium/long binary collation schema to string
Browse files Browse the repository at this point in the history
  • Loading branch information
twthorn committed Feb 13, 2025
1 parent 8bc8daf commit c257065
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 3 deletions.
7 changes: 6 additions & 1 deletion src/main/java/io/debezium/connector/vitess/VitessType.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public static VitessType resolve(Query.Field field, boolean isEnumSetStringValue
case "INT64":
return new VitessType(type, Types.BIGINT);
case "BLOB":
if (VitessValueConverter.matches(field.getColumnType().toUpperCase(), "TEXT")) {
if (matchAny(field, List.of("TINYTEXT", "TEXT", "MEDIUMTEXT", "LONGTEXT"))) {
return new VitessType(type, Types.VARCHAR);
}
return new VitessType(type, Types.BLOB);
Expand Down Expand Up @@ -154,6 +154,11 @@ else if (VitessValueConverter.matches(field.getColumnType().toUpperCase(), "SET"
}
}

private static boolean matchAny(Query.Field field, List<String> types) {
String upperCaseType = field.getColumnType().toUpperCase();
return types.stream().filter(type -> VitessValueConverter.matches(upperCaseType, type)).findAny().isPresent();
}

private static VitessType getEnumOrSetVitessType(boolean isEnumSetStringValue, String type, Query.Field field) {
int jdbcType;
if (type.equals("ENUM") && !isEnumSetStringValue) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,21 @@ public abstract class AbstractVitessConnectorTest extends AbstractAsyncEngineCon
"char_col," +
"binary_ascii_collate_ascii_bin_col," +
"varbinary_col," +
"tinytext_ascii_collate_ascii_bin_col," +
"tinytext_col," +
"text_ascii_collate_ascii_bin_col," +
"text_col," +
"mediumtext_ascii_collate_ascii_bin_col," +
"mediumtext_col," +
"longtext_ascii_collate_ascii_bin_col," +
"longtext_col," +
"blob_ascii_collate_ascii_bin_col," +
"enum_ascii_collate_ascii_bin_col," +
"enum_col," +
"set_ascii_collate_ascii_bin_col," +
"set_col" +
") " +
"VALUES (\"foo\", \"foo\", \"foobarfoo\", \"foobarfoo\", \"foobarfoo\", \"foo\", \"foo\", \"foo\", \"foo\", \"small\", \"small\", \"a\", \"a\");";
"VALUES (\"foo\", \"foo\", \"foobarfoo\", \"foobarfoo\", \"foobarfoo\", \"foo\", \"foo\", \"foo\", \"foo\", \"foo\", \"foo\", \"foo\", \"foo\", \"foo\", \"foo\", \"small\", \"small\", \"a\", \"a\");";
protected static final String INSERT_BYTES_TYPES_STMT = "INSERT INTO string_table ("
+ "binary_col,"
+ "varbinary_col,"
Expand Down Expand Up @@ -217,8 +223,14 @@ protected List<SchemaAndValueField> schemasAndValuesForCharSetCollateTypes() {
new SchemaAndValueField("char_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foobarfoo"),
new SchemaAndValueField("binary_ascii_collate_ascii_bin_col", SchemaBuilder.OPTIONAL_BYTES_SCHEMA, ByteBuffer.wrap("foobarfoo".getBytes())),
new SchemaAndValueField("varbinary_col", SchemaBuilder.OPTIONAL_BYTES_SCHEMA, ByteBuffer.wrap("foo".getBytes())),
new SchemaAndValueField("tinytext_ascii_collate_ascii_bin_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("tinytext_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("text_ascii_collate_ascii_bin_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("text_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("mediumtext_ascii_collate_ascii_bin_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("mediumtext_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("longtext_ascii_collate_ascii_bin_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("longtext_col", SchemaBuilder.OPTIONAL_STRING_SCHEMA, "foo"),
new SchemaAndValueField("blob_ascii_collate_ascii_bin_col", SchemaBuilder.OPTIONAL_BYTES_SCHEMA, ByteBuffer.wrap("foo".getBytes())),
new SchemaAndValueField("enum_ascii_collate_ascii_bin_col", io.debezium.data.Enum.builder("small,medium,large").optional().build(), "small"),
new SchemaAndValueField("enum_col", io.debezium.data.Enum.builder("small,medium,large").optional().build(), "small"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public void shouldResolveVitessTypeWhereColumnTypeDiffers() {
Query.Field varBinary = Query.Field.newBuilder().setType(Query.Type.VARBINARY).setColumnType("varbinary(9)").build();
assertThat(VitessType.resolve(varBinary).getJdbcId()).isEqualTo(Types.BINARY);
Query.Field textCollateBinary = Query.Field.newBuilder().setType(Query.Type.BLOB).setColumnType("text").build();
assertThat(VitessType.resolve(textCollateBinary).getJdbcId()).isEqualTo(Types.VARCHAR); // Why not
assertThat(VitessType.resolve(textCollateBinary).getJdbcId()).isEqualTo(Types.VARCHAR);
Query.Field mediumtextCollateBinary = Query.Field.newBuilder().setType(Query.Type.BLOB).setColumnType("mediumtext").build();
assertThat(VitessType.resolve(mediumtextCollateBinary).getJdbcId()).isEqualTo(Types.VARCHAR);
}

@Test
Expand Down
6 changes: 6 additions & 0 deletions src/test/resources/vitess_create_tables.ddl
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,14 @@ CREATE TABLE character_set_collate_table
`char_col` CHAR(9),
`binary_ascii_collate_ascii_bin_col` BINARY(9), -- character set & collation are binary by default
`varbinary_col` VARBINARY(32), -- character set & collation are binary by default
`tinytext_ascii_collate_ascii_bin_col` TINYTEXT CHARACTER SET ascii COLLATE ascii_bin,
`tinytext_col` TINYTEXT,
`text_ascii_collate_ascii_bin_col` TEXT CHARACTER SET ascii COLLATE ascii_bin,
`text_col` TEXT,
`mediumtext_ascii_collate_ascii_bin_col` MEDIUMTEXT CHARACTER SET ascii COLLATE ascii_bin,
`mediumtext_col` MEDIUMTEXT,
`longtext_ascii_collate_ascii_bin_col` LONGTEXT CHARACTER SET ascii COLLATE ascii_bin,
`longtext_col` LONGTEXT,
`blob_ascii_collate_ascii_bin_col` BLOB, -- character set & collation are binary by default
`enum_ascii_collate_ascii_bin_col` ENUM('small', 'medium', 'large') CHARACTER SET ascii COLLATE ascii_bin DEFAULT 'medium',
`enum_col` ENUM('small', 'medium', 'large') DEFAULT 'medium',
Expand Down

0 comments on commit c257065

Please sign in to comment.