diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java index ef5bc52aaee7a..8420512624368 100644 --- a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java @@ -7,7 +7,6 @@ import com.linkedin.util.Pair; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; @@ -22,8 +21,7 @@ public class ProtobufUtils { private ProtobufUtils() {} public static String collapseLocationComments(DescriptorProtos.SourceCodeInfo.Location location) { - String orig = - Stream.concat( + return Stream.concat( location.getLeadingDetachedCommentsList().stream(), Stream.of(location.getLeadingComments(), location.getTrailingComments())) .filter(Objects::nonNull) @@ -31,14 +29,6 @@ public static String collapseLocationComments(DescriptorProtos.SourceCodeInfo.Lo .map(line -> line.replaceFirst("^[*/ ]+", "")) .collect(Collectors.joining("\n")) .trim(); - - /* - * Sometimes DataHub doesn't like these strings. Not sure if its DataHub - * or protobuf issue: https://github.com/protocolbuffers/protobuf/issues/4691 - * - * We essentially smash utf8 chars to ascii here - */ - return new String(orig.getBytes(StandardCharsets.ISO_8859_1)); } /* diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java index c3ede2e62c314..836c036c4dc35 100644 --- a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java @@ -1,5 +1,6 @@ package datahub.protobuf.model; +import com.google.protobuf.DescriptorProtos; import com.google.protobuf.DescriptorProtos.DescriptorProto; import com.google.protobuf.DescriptorProtos.FieldDescriptorProto; import com.google.protobuf.DescriptorProtos.FileDescriptorProto; @@ -18,14 +19,19 @@ import datahub.protobuf.ProtobufUtils; import datahub.protobuf.visitors.ProtobufModelVisitor; import datahub.protobuf.visitors.VisitContext; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; + import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; + @Builder(toBuilder = true) @Getter @@ -81,78 +87,72 @@ public int getNumber() { @Override public String nativeType() { - return Optional.ofNullable(nativeType) - .orElseGet( - () -> { - if (fieldProto.getTypeName().isEmpty()) { - return fieldProto.getType().name().split("_")[1].toLowerCase(); - } else { - return fieldProto.getTypeName().replaceFirst("^[.]", ""); - } - }); + return Optional.ofNullable(nativeType).orElseGet(() -> { + if (fieldProto.getTypeName().isEmpty()) { + return fieldProto.getType().name().split("_")[1].toLowerCase(); + } else { + return fieldProto.getTypeName().replaceFirst("^[.]", ""); + } + }); } @Override public String fieldPathType() { - return Optional.ofNullable(fieldPathType) - .orElseGet( - () -> { - final String pathType; - - switch (fieldProto.getType()) { - case TYPE_DOUBLE: - pathType = "double"; - break; - case TYPE_FLOAT: - pathType = "float"; - break; - case TYPE_SFIXED64: - case TYPE_FIXED64: - case TYPE_UINT64: - case TYPE_INT64: - case TYPE_SINT64: - pathType = "long"; - break; - case TYPE_FIXED32: - case TYPE_SFIXED32: - case TYPE_INT32: - case TYPE_UINT32: - case TYPE_SINT32: - pathType = "int"; - break; - case TYPE_BYTES: - pathType = "bytes"; - break; - case TYPE_ENUM: - pathType = "enum"; - break; - case TYPE_BOOL: - pathType = "boolean"; - break; - case TYPE_STRING: - pathType = "string"; - break; - case TYPE_GROUP: - case TYPE_MESSAGE: - pathType = nativeType().replace(".", "_"); - break; - default: - throw new IllegalStateException( - String.format( - "Unexpected FieldDescriptorProto => FieldPathType %s", - fieldProto.getType())); - } - - StringArray fieldPath = new StringArray(); - - if (schemaFieldDataType().getType().isArrayType()) { - fieldPath.add("[type=array]"); - } - - fieldPath.add(String.format("[type=%s]", pathType)); - - return String.join(".", fieldPath); - }); + return Optional.ofNullable(fieldPathType).orElseGet(() -> { + final String pathType; + + switch (fieldProto.getType()) { + case TYPE_DOUBLE: + pathType = "double"; + break; + case TYPE_FLOAT: + pathType = "float"; + break; + case TYPE_SFIXED64: + case TYPE_FIXED64: + case TYPE_UINT64: + case TYPE_INT64: + case TYPE_SINT64: + pathType = "long"; + break; + case TYPE_FIXED32: + case TYPE_SFIXED32: + case TYPE_INT32: + case TYPE_UINT32: + case TYPE_SINT32: + pathType = "int"; + break; + case TYPE_BYTES: + pathType = "bytes"; + break; + case TYPE_ENUM: + pathType = "enum"; + break; + case TYPE_BOOL: + pathType = "boolean"; + break; + case TYPE_STRING: + pathType = "string"; + break; + case TYPE_GROUP: + case TYPE_MESSAGE: + pathType = nativeType().replace(".", "_"); + break; + default: + throw new IllegalStateException( + String.format("Unexpected FieldDescriptorProto => FieldPathType %s", fieldProto.getType())); + } + + StringArray fieldPath = new StringArray(); + + if (schemaFieldDataType().getType().isArrayType()) { + fieldPath.add("[type=array]"); + } + + fieldPath.add(String.format("[type=%s]", pathType)); + + return String.join(".", fieldPath); + }); } public boolean isMessage() { @@ -165,110 +165,92 @@ public int sortWeight() { } public SchemaFieldDataType schemaFieldDataType() throws IllegalStateException { - return Optional.ofNullable(schemaFieldDataType) - .orElseGet( - () -> { - final SchemaFieldDataType.Type fieldType; - - switch (fieldProto.getType()) { - case TYPE_DOUBLE: - case TYPE_FLOAT: - case TYPE_INT64: - case TYPE_UINT64: - case TYPE_INT32: - case TYPE_UINT32: - case TYPE_SINT32: - case TYPE_SINT64: - fieldType = SchemaFieldDataType.Type.create(new NumberType()); - break; - case TYPE_GROUP: - case TYPE_MESSAGE: - fieldType = SchemaFieldDataType.Type.create(new RecordType()); - break; - case TYPE_BYTES: - fieldType = SchemaFieldDataType.Type.create(new BytesType()); - break; - case TYPE_ENUM: - fieldType = SchemaFieldDataType.Type.create(new EnumType()); - break; - case TYPE_BOOL: - fieldType = SchemaFieldDataType.Type.create(new BooleanType()); - break; - case TYPE_STRING: - fieldType = SchemaFieldDataType.Type.create(new StringType()); - break; - case TYPE_FIXED64: - case TYPE_FIXED32: - case TYPE_SFIXED32: - case TYPE_SFIXED64: - fieldType = SchemaFieldDataType.Type.create(new FixedType()); - break; - default: - throw new IllegalStateException( - String.format( - "Unexpected FieldDescriptorProto => SchemaFieldDataType: %s", - fieldProto.getType())); - } - - if (fieldProto.getLabel().equals(FieldDescriptorProto.Label.LABEL_REPEATED)) { - return new SchemaFieldDataType() - .setType( - SchemaFieldDataType.Type.create( - new ArrayType().setNestedType(new StringArray()))); - } - - return new SchemaFieldDataType().setType(fieldType); - }); + return Optional.ofNullable(schemaFieldDataType).orElseGet(() -> { + final SchemaFieldDataType.Type fieldType; + + switch (fieldProto.getType()) { + case TYPE_DOUBLE: + case TYPE_FLOAT: + case TYPE_INT64: + case TYPE_UINT64: + case TYPE_INT32: + case TYPE_UINT32: + case TYPE_SINT32: + case TYPE_SINT64: + fieldType = SchemaFieldDataType.Type.create(new NumberType()); + break; + case TYPE_GROUP: + case TYPE_MESSAGE: + fieldType = SchemaFieldDataType.Type.create(new RecordType()); + break; + case TYPE_BYTES: + fieldType = SchemaFieldDataType.Type.create(new BytesType()); + break; + case TYPE_ENUM: + fieldType = SchemaFieldDataType.Type.create(new EnumType()); + break; + case TYPE_BOOL: + fieldType = SchemaFieldDataType.Type.create(new BooleanType()); + break; + case TYPE_STRING: + fieldType = SchemaFieldDataType.Type.create(new StringType()); + break; + case TYPE_FIXED64: + case TYPE_FIXED32: + case TYPE_SFIXED32: + case TYPE_SFIXED64: + fieldType = SchemaFieldDataType.Type.create(new FixedType()); + break; + default: + throw new IllegalStateException( + String.format("Unexpected FieldDescriptorProto => SchemaFieldDataType: %s", fieldProto.getType())); + } + + if (fieldProto.getLabel().equals(FieldDescriptorProto.Label.LABEL_REPEATED)) { + return new SchemaFieldDataType().setType( + SchemaFieldDataType.Type.create(new ArrayType().setNestedType(new StringArray()))); + } + + return new SchemaFieldDataType().setType(fieldType); + }); } @Override public Stream messageLocations() { List fileLocations = fileProto().getSourceCodeInfo().getLocationList(); return fileLocations.stream() - .filter( - loc -> - loc.getPathCount() > 1 - && loc.getPath(0) == FileDescriptorProto.MESSAGE_TYPE_FIELD_NUMBER); + .filter(loc -> loc.getPathCount() > 1 && loc.getPath(0) == FileDescriptorProto.MESSAGE_TYPE_FIELD_NUMBER); } @Override public String comment() { - return messageLocations() - .filter(location -> location.getPathCount() > 3) - .filter( - location -> - !ProtobufUtils.collapseLocationComments(location).isEmpty() - && !isEnumType(location.getPathList())) - .filter( - location -> { - List pathList = location.getPathList(); - DescriptorProto messageType = fileProto().getMessageType(pathList.get(1)); - - if (!isNestedType - && location.getPath(2) == DescriptorProto.FIELD_FIELD_NUMBER - && fieldProto == messageType.getField(location.getPath(3))) { - return true; - } else if (isNestedType - && location.getPath(2) == DescriptorProto.NESTED_TYPE_FIELD_NUMBER - && fieldProto == getNestedTypeFields(pathList, messageType)) { - return true; - } - return false; - }) + return messageLocations().filter(location -> location.getPathCount() > 3) + .filter(location -> !ProtobufUtils.collapseLocationComments(location).isEmpty() && !isEnumType( + location.getPathList())) + .filter(location -> { + List pathList = location.getPathList(); + DescriptorProto messageType = fileProto().getMessageType(pathList.get(1)); + + if (!isNestedType && location.getPath(2) == DescriptorProto.FIELD_FIELD_NUMBER + && fieldProto == messageType.getField(location.getPath(3))) { + return true; + } else if (isNestedType && location.getPath(2) == DescriptorProto.NESTED_TYPE_FIELD_NUMBER + && fieldProto == getNestedTypeFields(pathList, messageType)) { + return true; + } + return false; + }) .map(ProtobufUtils::collapseLocationComments) .collect(Collectors.joining("\n")) .trim(); } - private FieldDescriptorProto getNestedTypeFields( - List pathList, DescriptorProto messageType) { + private FieldDescriptorProto getNestedTypeFields(List pathList, DescriptorProto messageType) { int pathSize = pathList.size(); List nestedValues = new ArrayList<>(pathSize); for (int index = 0; index < pathSize; index++) { - if (index > 1 - && index % 2 == 0 - && pathList.get(index) == DescriptorProto.NESTED_TYPE_FIELD_NUMBER) { + if (index > 1 && index % 2 == 0 && pathList.get(index) == DescriptorProto.NESTED_TYPE_FIELD_NUMBER) { nestedValues.add(pathList.get(index + 1)); } } @@ -278,9 +260,7 @@ private FieldDescriptorProto getNestedTypeFields( } int fieldIndex = pathList.get(pathList.size() - 1); - if (isFieldPath(pathList) - && pathSize % 2 == 0 - && fieldIndex < messageType.getFieldList().size()) { + if (isFieldPath(pathList) && pathSize % 2 == 0 && fieldIndex < messageType.getFieldList().size()) { return messageType.getField(fieldIndex); } @@ -293,9 +273,7 @@ private boolean isFieldPath(List pathList) { private boolean isEnumType(List pathList) { for (int index = 0; index < pathList.size(); index++) { - if (index > 1 - && index % 2 == 0 - && pathList.get(index) == DescriptorProto.ENUM_TYPE_FIELD_NUMBER) { + if (index > 1 && index % 2 == 0 && pathList.get(index) == DescriptorProto.ENUM_TYPE_FIELD_NUMBER) { return true; } } @@ -330,4 +308,63 @@ public boolean equals(Object o) { public int hashCode() { return fullName().hashCode(); } + + public boolean isEnum() { + return getFieldProto().getType() == DescriptorProtos.FieldDescriptorProto.Type.TYPE_ENUM; + } + + public Optional getEnumDescriptor() { + if (!isEnum()) { + return Optional.empty(); + } + + String enumTypeName = getFieldProto().getTypeName(); + String shortEnumTypeName = enumTypeName.substring(enumTypeName.lastIndexOf('.') + 1); + + return getProtobufMessage().fileProto().getEnumTypeList().stream() + .filter(enumType -> enumType.getName().equals(shortEnumTypeName)) + .findFirst(); + } + + public List getEnumValues() { + return getEnumDescriptor().map(DescriptorProtos.EnumDescriptorProto::getValueList).orElse(Collections.emptyList()); + } + + public Map getEnumValuesWithComments() { + Optional enumProtoOpt = getEnumDescriptor(); + if (enumProtoOpt.isEmpty()) { + return Collections.emptyMap(); + } + + DescriptorProtos.EnumDescriptorProto enumProto = enumProtoOpt.get(); + Map valueComments = new LinkedHashMap<>(); + List values = enumProto.getValueList(); + List locations = + getProtobufMessage().fileProto().getSourceCodeInfo().getLocationList(); + + int enumIndex = getProtobufMessage().fileProto().getEnumTypeList().indexOf(enumProto); + + for (int i = 0; i < values.size(); i++) { + DescriptorProtos.EnumValueDescriptorProto value = values.get(i); + int finalI = i; + String comment = locations.stream() + .filter(loc -> isEnumValueLocation(loc, enumIndex, finalI)) + .findFirst() + .map(ProtobufUtils::collapseLocationComments) + .orElse(""); + + valueComments.put(value.getName(), comment); + } + + return valueComments; + } + + private boolean isEnumValueLocation(DescriptorProtos.SourceCodeInfo.Location location, int enumIndex, + int valueIndex) { + return location.getPathCount() > 3 + && location.getPath(0) == DescriptorProtos.FileDescriptorProto.ENUM_TYPE_FIELD_NUMBER + && location.getPath(1) == enumIndex + && location.getPath(2) == DescriptorProtos.EnumDescriptorProto.VALUE_FIELD_NUMBER + && location.getPath(3) == valueIndex; + } } diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java index 240cf7b6d168b..fb1df90b9c5b0 100644 --- a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java @@ -1,8 +1,5 @@ package datahub.protobuf.visitors.field; -import static datahub.protobuf.ProtobufUtils.getFieldOptions; -import static datahub.protobuf.ProtobufUtils.getMessageOptions; - import com.linkedin.common.GlobalTags; import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.common.GlossaryTermAssociationArray; @@ -13,60 +10,107 @@ import com.linkedin.schema.SchemaField; import com.linkedin.tag.TagProperties; import com.linkedin.util.Pair; +import datahub.protobuf.model.FieldTypeEdge; +import datahub.protobuf.model.ProtobufElement; import datahub.protobuf.model.ProtobufField; import datahub.protobuf.visitors.ProtobufExtensionUtil; import datahub.protobuf.visitors.VisitContext; +import org.jgrapht.GraphPath; + import java.util.Comparator; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; +import static datahub.protobuf.ProtobufUtils.getFieldOptions; +import static datahub.protobuf.ProtobufUtils.getMessageOptions; + + public class ProtobufExtensionFieldVisitor extends SchemaFieldVisitor { @Override public Stream> visitField(ProtobufField field, VisitContext context) { - boolean isPrimaryKey = - getFieldOptions(field.getFieldProto()).stream() - .map(Pair::getKey) - .anyMatch(fieldDesc -> fieldDesc.getName().matches("(?i).*primary_?key")); - - List tags = - Stream.concat( - ProtobufExtensionUtil.extractTagPropertiesFromOptions( - getFieldOptions(field.getFieldProto()), context.getGraph().getRegistry()), - promotedTags(field, context)) - .distinct() - .map(tag -> new TagAssociation().setTag(new TagUrn(tag.getName()))) - .sorted(Comparator.comparing(t -> t.getTag().getName())) - .collect(Collectors.toList()); - - List terms = - Stream.concat( - ProtobufExtensionUtil.extractTermAssociationsFromOptions( - getFieldOptions(field.getFieldProto()), context.getGraph().getRegistry()), - promotedTerms(field, context)) - .distinct() - .sorted(Comparator.comparing(a -> a.getUrn().getNameEntity())) - .collect(Collectors.toList()); - - return context - .streamAllPaths(field) - .map( - path -> - Pair.of( - new SchemaField() - .setFieldPath(context.getFieldPath(path)) - .setNullable(!isPrimaryKey) - .setIsPartOfKey(isPrimaryKey) - .setDescription(field.comment()) - .setNativeDataType(field.nativeType()) - .setType(field.schemaFieldDataType()) - .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray(tags))) - .setGlossaryTerms( - new GlossaryTerms() - .setTerms(new GlossaryTermAssociationArray(terms)) - .setAuditStamp(context.getAuditStamp())), - context.calculateSortOrder(path, field))); + boolean isPrimaryKey = getFieldOptions(field.getFieldProto()).stream() + .map(Pair::getKey) + .anyMatch(fieldDesc -> fieldDesc.getName().matches("(?i).*primary_?key")); + + List tags = getTagAssociations(field, context); + List terms = getGlossaryTermAssociations(field, context); + + return context.streamAllPaths(field) + .map(path -> Pair.of(createSchemaField(field, context, path, isPrimaryKey, tags, terms), + context.calculateSortOrder(path, field))); + } + + private SchemaField createSchemaField(ProtobufField field, VisitContext context, + GraphPath path, boolean isPrimaryKey, List tags, + List terms) { + String description = createFieldDescription(field); + + return new SchemaField().setFieldPath(context.getFieldPath(path)) + .setNullable(!isPrimaryKey) + .setIsPartOfKey(isPrimaryKey) + .setDescription(description) + .setNativeDataType(field.nativeType()) + .setType(field.schemaFieldDataType()) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray(tags))) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray(terms)) + .setAuditStamp(context.getAuditStamp())); + } + + private String createFieldDescription(ProtobufField field) { + StringBuilder description = new StringBuilder(field.comment()); + + if (field.isEnum()) { + description.append("\n\n"); + Map enumValuesWithComments = field.getEnumValuesWithComments(); + if (!enumValuesWithComments.isEmpty()) { + appendEnumValues(description, field, enumValuesWithComments); + } + } + + return description.toString(); + } + + private void appendEnumValues(StringBuilder description, ProtobufField field, + Map enumValuesWithComments) { + enumValuesWithComments.forEach((name, comment) -> { + field.getEnumValues().stream().filter(v -> v.getName().equals(name)).findFirst().ifPresent(value -> { + description.append(String.format("%d: %s", value.getNumber(), name)); + if (!comment.isEmpty()) { + description.append(" - ").append(comment); + } + description.append("\n"); + }); + }); + } + + private List getTagAssociations(ProtobufField field, VisitContext context) { + Stream fieldTags = + ProtobufExtensionUtil.extractTagPropertiesFromOptions(getFieldOptions(field.getFieldProto()), + context.getGraph().getRegistry()).map(tag -> new TagAssociation().setTag(new TagUrn(tag.getName()))); + + Stream promotedTags = + promotedTags(field, context).map(tag -> new TagAssociation().setTag(new TagUrn(tag.getName()))); + + return Stream.concat(fieldTags, promotedTags) + .distinct() + .sorted(Comparator.comparing(t -> t.getTag().getName())) + .collect(Collectors.toList()); + } + + private List getGlossaryTermAssociations(ProtobufField field, VisitContext context) { + Stream fieldTerms = + ProtobufExtensionUtil.extractTermAssociationsFromOptions(getFieldOptions(field.getFieldProto()), + context.getGraph().getRegistry()); + + Stream promotedTerms = promotedTerms(field, context); + + return Stream.concat(fieldTerms, promotedTerms) + .distinct() + .sorted(Comparator.comparing(a -> a.getUrn().getNameEntity())) + .collect(Collectors.toList()); } /** @@ -76,12 +120,11 @@ public Stream> visitField(ProtobufField field, VisitCo */ private Stream promotedTags(ProtobufField field, VisitContext context) { if (field.isMessage()) { - return context.getGraph().outgoingEdgesOf(field).stream() - .flatMap( - e -> - ProtobufExtensionUtil.extractTagPropertiesFromOptions( - getMessageOptions(e.getEdgeTarget().messageProto()), - context.getGraph().getRegistry())) + return context.getGraph() + .outgoingEdgesOf(field) + .stream() + .flatMap(e -> ProtobufExtensionUtil.extractTagPropertiesFromOptions( + getMessageOptions(e.getEdgeTarget().messageProto()), context.getGraph().getRegistry())) .distinct(); } else { return Stream.of(); @@ -95,12 +138,11 @@ private Stream promotedTags(ProtobufField field, VisitContext con */ private Stream promotedTerms(ProtobufField field, VisitContext context) { if (field.isMessage()) { - return context.getGraph().outgoingEdgesOf(field).stream() - .flatMap( - e -> - ProtobufExtensionUtil.extractTermAssociationsFromOptions( - getMessageOptions(e.getEdgeTarget().messageProto()), - context.getGraph().getRegistry())) + return context.getGraph() + .outgoingEdgesOf(field) + .stream() + .flatMap(e -> ProtobufExtensionUtil.extractTermAssociationsFromOptions( + getMessageOptions(e.getEdgeTarget().messageProto()), context.getGraph().getRegistry())) .distinct(); } else { return Stream.of(); diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java index 9bf649041e035..78a90048bca59 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java @@ -8,6 +8,7 @@ import com.google.protobuf.ExtensionRegistry; import datahub.protobuf.model.ProtobufGraph; import java.io.IOException; +import java.util.Arrays; import org.testng.annotations.Test; public class ProtobufUtilsTest { @@ -44,4 +45,18 @@ public void registryTest() throws IOException, IllegalArgumentException { + "[meta.msg.repeat_enum]: EVENT\n", graph.root().messageProto().getOptions().toString()); } -} + + @Test + public void testCollapseLocationCommentsWithUTF8() { + DescriptorProtos.SourceCodeInfo.Location location = DescriptorProtos.SourceCodeInfo.Location.newBuilder() + .addAllLeadingDetachedComments(Arrays.asList("/* Emoji 😊 */", "/* Accented é */")) + .setLeadingComments("/* Chinese 你好 */\n// Russian Привет") + .setTrailingComments("// Korean 안녕") + .build(); + + String actual = ProtobufUtils.collapseLocationComments(location); + String expected = "Emoji 😊 */\nAccented é */\nChinese 你好 */\nRussian Привет\nKorean 안녕"; + + assertEquals(expected, actual); + } +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java index 40d54a8651012..78f01e020111f 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java @@ -355,4 +355,20 @@ public void nestedTypeReservedFieldsTest() throws IOException { assertEquals("test comment 14", msg3Field14.getDescription()); } -} + + @Test + public void timestampUnitEnumDescriptionTest() throws IOException { + ProtobufDataset test = getTestProtobufDataset("extended_protobuf", "messageE"); + SchemaMetadata testMetadata = test.getSchemaMetadata(); + + SchemaField timestampField = testMetadata.getFields() + .stream() + .filter(v -> v.getFieldPath() + .equals("[version=2.0].[type=extended_protobuf_TimestampUnitMessage].[type=enum].timestamp_unit_type")) + .findFirst() + .orElseThrow(); + + assertEquals("timestamp unit\n" + "\n" + "0: MILLISECOND - 10^-3 seconds\n" + "1: MICROSECOND - 10^-6 seconds\n" + + "2: NANOSECOND - 10^-9 seconds\n", timestampField.getDescription()); + } +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageE.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageE.proto new file mode 100644 index 0000000000000..50425f431c62d --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageE.proto @@ -0,0 +1,19 @@ +syntax = "proto3"; +package extended_protobuf; + +/* + Timestamp unit enum +*/ +enum TimestampUnitEnum { + MILLISECOND = 0; // 10^-3 seconds + MICROSECOND = 1; // 10^-6 seconds + NANOSECOND = 2; // 10^-9 seconds +} + +/* + Timestamp unit message +*/ +message TimestampUnitMessage { + // timestamp unit + TimestampUnitEnum timestamp_unit_type = 1; +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageE.protoc b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageE.protoc new file mode 100644 index 0000000000000..1cefa365ef178 Binary files /dev/null and b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageE.protoc differ