From 916d1947af1669a5d219d1f840eae45a67a007f7 Mon Sep 17 00:00:00 2001 From: Guillaume Smet Date: Sat, 23 Nov 2024 18:30:04 +0100 Subject: [PATCH 1/3] Config Doc - Fix conversion of HTML Javadoc Converting HTML to Asciidoc using the Javadoc elements was causing issues as the HTML would be split and the fragments would be handled without the knowledge of the HTML tree. --- .../JavadocToAsciidocTransformer.java | 66 +++++++++++++------ 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java b/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java index 5007f9ce71bc0..8e2f07e13ddab 100644 --- a/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java +++ b/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java @@ -1,5 +1,8 @@ package io.quarkus.annotation.processor.documentation.config.formatter; +import java.util.Map; +import java.util.TreeMap; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.Jsoup; @@ -62,6 +65,8 @@ public final class JavadocToAsciidocTransformer { private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE = "[quote]\n____"; private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END = "____"; + private static final Pattern INLINE_TAG_MARKER_PATTERN = Pattern.compile("§§([0-9]+)§§"); + private JavadocToAsciidocTransformer() { } @@ -84,7 +89,11 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in // we add it as it has been previously removed Javadoc parsedJavadoc = StaticJavaParser.parseJavadoc(START_OF_LINE.matcher(javadoc).replaceAll("* ")); + StringBuilder htmlJavadoc = new StringBuilder(javadoc.length()); + + int markerCounter = 0; StringBuilder sb = new StringBuilder(); + Map inlineTagsReplacements = new TreeMap<>(); for (JavadocDescriptionElement javadocDescriptionElement : parsedJavadoc.getDescription().getElements()) { if (javadocDescriptionElement instanceof JavadocInlineTag) { @@ -95,40 +104,56 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in case VALUE: case LITERAL: case SYSTEM_PROPERTY: + sb.setLength(0); sb.append('`'); appendEscapedAsciiDoc(sb, content, inlineMacroMode); sb.append('`'); + htmlJavadoc.append("§§" + markerCounter + "§§"); + inlineTagsReplacements.put(markerCounter, sb.toString()); + markerCounter++; break; case LINK: case LINKPLAIN: if (content.startsWith(HASH)) { content = ConfigNamingUtil.hyphenate(content.substring(1)); } + sb.setLength(0); sb.append('`'); appendEscapedAsciiDoc(sb, content, inlineMacroMode); sb.append('`'); + htmlJavadoc.append("§§" + markerCounter + "§§"); + inlineTagsReplacements.put(markerCounter, sb.toString()); + markerCounter++; break; default: - sb.append(content); + htmlJavadoc.append(content); break; } } else { - appendHtml(sb, Jsoup.parseBodyFragment(javadocDescriptionElement.toText()), inlineMacroMode); + htmlJavadoc.append(javadocDescriptionElement.toText()); } } - String asciidoc = trim(sb); + StringBuilder asciidocSb = new StringBuilder(); + htmlToAsciidoc(asciidocSb, Jsoup.parseBodyFragment(htmlJavadoc.toString()), inlineMacroMode); + String asciidoc = trim(asciidocSb); + + // not very optimal and could be included in htmlToAsciidoc() but simpler so let's go for it + if (!inlineTagsReplacements.isEmpty()) { + asciidoc = INLINE_TAG_MARKER_PATTERN.matcher(asciidoc) + .replaceAll(mr -> Matcher.quoteReplacement(inlineTagsReplacements.get(Integer.valueOf(mr.group(1))))); + } return asciidoc.isBlank() ? null : asciidoc; } - private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroMode) { + private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMacroMode) { for (Node childNode : node.childNodes()) { switch (childNode.nodeName()) { case PARAGRAPH_NODE: newLine(sb); newLine(sb); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); break; case PREFORMATED_NODE: newLine(sb); @@ -148,7 +173,7 @@ private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroM newLine(sb); sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE); newLine(sb); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); newLineIfNeeded(sb); sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END); newLine(sb); @@ -157,67 +182,68 @@ private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroM case ORDERED_LIST_NODE: case UN_ORDERED_LIST_NODE: newLine(sb); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); + newLine(sb); break; case LIST_ITEM_NODE: - final String marker = childNode.parent().nodeName().equals(ORDERED_LIST_NODE) + final String marker = childNode.parentNode().nodeName().equals(ORDERED_LIST_NODE) ? ORDERED_LIST_ITEM_ASCIDOC_STYLE : UNORDERED_LIST_ITEM_ASCIDOC_STYLE; newLine(sb); sb.append(marker); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); break; case LINK_NODE: final String link = childNode.attr(HREF_ATTRIBUTE); sb.append("link:"); sb.append(link); final StringBuilder caption = new StringBuilder(); - appendHtml(caption, childNode, inlineMacroMode); + htmlToAsciidoc(caption, childNode, inlineMacroMode); sb.append(String.format(LINK_ATTRIBUTE_FORMAT, trim(caption))); break; case CODE_NODE: sb.append(BACKTICK); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(BACKTICK); break; case BOLD_NODE: case STRONG_NODE: sb.append(STAR); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(STAR); break; case EMPHASIS_NODE: case ITALICS_NODE: sb.append(UNDERSCORE); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(UNDERSCORE); break; case UNDERLINE_NODE: sb.append(UNDERLINE_ASCIDOC_STYLE); sb.append(HASH); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(HASH); break; case SMALL_NODE: sb.append(SMALL_ASCIDOC_STYLE); sb.append(HASH); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(HASH); break; case BIG_NODE: sb.append(BIG_ASCIDOC_STYLE); sb.append(HASH); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(HASH); break; case SUB_SCRIPT_NODE: sb.append(SUB_SCRIPT_ASCIDOC_STYLE); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(SUB_SCRIPT_ASCIDOC_STYLE); break; case SUPER_SCRIPT_NODE: sb.append(SUPER_SCRIPT_ASCIDOC_STYLE); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(SUPER_SCRIPT_ASCIDOC_STYLE); break; case DEL_NODE: @@ -225,7 +251,7 @@ private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroM case STRIKE_NODE: sb.append(LINE_THROUGH_ASCIDOC_STYLE); sb.append(HASH); - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); sb.append(HASH); break; case NEW_LINE_NODE: @@ -249,7 +275,7 @@ private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroM appendEscapedAsciiDoc(sb, text, inlineMacroMode); break; default: - appendHtml(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode); break; } } From 10ddc12e3931b75d701f2c3707361a294913fbda Mon Sep 17 00:00:00 2001 From: Guillaume Smet Date: Mon, 25 Nov 2024 10:08:00 +0100 Subject: [PATCH 2/3] Config Doc - Add basic support for HTML tables -> AsciiDoc tables It will only handle very simple cases but it makes the doc for quarkus.native.resources.includes a lot better. --- .../JavadocToAsciidocTransformer.java | 130 +++++++++++++++--- 1 file changed, 109 insertions(+), 21 deletions(-) diff --git a/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java b/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java index 8e2f07e13ddab..adf98de79d86c 100644 --- a/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java +++ b/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java @@ -50,6 +50,12 @@ public final class JavadocToAsciidocTransformer { private static final String UN_ORDERED_LIST_NODE = "ul"; private static final String PREFORMATED_NODE = "pre"; private static final String BLOCKQUOTE_NODE = "blockquote"; + private static final String TABLE_NODE = "table"; + private static final String THEAD_NODE = "thead"; + private static final String TBODY_NODE = "tbody"; + private static final String TR_NODE = "tr"; + private static final String TH_NODE = "th"; + private static final String TD_NODE = "td"; private static final String BIG_ASCIDOC_STYLE = "[.big]"; private static final String LINK_ATTRIBUTE_FORMAT = "[%s]"; @@ -64,6 +70,9 @@ public final class JavadocToAsciidocTransformer { private static final String CODE_BLOCK_ASCIDOC_STYLE = "```"; private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE = "[quote]\n____"; private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END = "____"; + private static final String TABLE_MARKER = "!==="; + private static final String COLUMN_HEADER_MARKER = "h!"; + private static final String COLUMN_MARKER = "!"; private static final Pattern INLINE_TAG_MARKER_PATTERN = Pattern.compile("§§([0-9]+)§§"); @@ -106,7 +115,7 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in case SYSTEM_PROPERTY: sb.setLength(0); sb.append('`'); - appendEscapedAsciiDoc(sb, content, inlineMacroMode); + appendEscapedAsciiDoc(sb, content, inlineMacroMode, new Context()); sb.append('`'); htmlJavadoc.append("§§" + markerCounter + "§§"); inlineTagsReplacements.put(markerCounter, sb.toString()); @@ -119,7 +128,7 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in } sb.setLength(0); sb.append('`'); - appendEscapedAsciiDoc(sb, content, inlineMacroMode); + appendEscapedAsciiDoc(sb, content, inlineMacroMode, new Context()); sb.append('`'); htmlJavadoc.append("§§" + markerCounter + "§§"); inlineTagsReplacements.put(markerCounter, sb.toString()); @@ -135,7 +144,7 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in } StringBuilder asciidocSb = new StringBuilder(); - htmlToAsciidoc(asciidocSb, Jsoup.parseBodyFragment(htmlJavadoc.toString()), inlineMacroMode); + htmlToAsciidoc(asciidocSb, Jsoup.parseBodyFragment(htmlJavadoc.toString()), inlineMacroMode, new Context()); String asciidoc = trim(asciidocSb); // not very optimal and could be included in htmlToAsciidoc() but simpler so let's go for it @@ -147,13 +156,13 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in return asciidoc.isBlank() ? null : asciidoc; } - private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMacroMode) { + private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMacroMode, Context context) { for (Node childNode : node.childNodes()) { switch (childNode.nodeName()) { case PARAGRAPH_NODE: newLine(sb); newLine(sb); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); break; case PREFORMATED_NODE: newLine(sb); @@ -173,7 +182,7 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa newLine(sb); sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE); newLine(sb); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); newLineIfNeeded(sb); sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END); newLine(sb); @@ -182,7 +191,7 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa case ORDERED_LIST_NODE: case UN_ORDERED_LIST_NODE: newLine(sb); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); newLine(sb); break; case LIST_ITEM_NODE: @@ -191,59 +200,59 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa : UNORDERED_LIST_ITEM_ASCIDOC_STYLE; newLine(sb); sb.append(marker); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); break; case LINK_NODE: final String link = childNode.attr(HREF_ATTRIBUTE); sb.append("link:"); sb.append(link); final StringBuilder caption = new StringBuilder(); - htmlToAsciidoc(caption, childNode, inlineMacroMode); + htmlToAsciidoc(caption, childNode, inlineMacroMode, context); sb.append(String.format(LINK_ATTRIBUTE_FORMAT, trim(caption))); break; case CODE_NODE: sb.append(BACKTICK); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(BACKTICK); break; case BOLD_NODE: case STRONG_NODE: sb.append(STAR); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(STAR); break; case EMPHASIS_NODE: case ITALICS_NODE: sb.append(UNDERSCORE); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(UNDERSCORE); break; case UNDERLINE_NODE: sb.append(UNDERLINE_ASCIDOC_STYLE); sb.append(HASH); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(HASH); break; case SMALL_NODE: sb.append(SMALL_ASCIDOC_STYLE); sb.append(HASH); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(HASH); break; case BIG_NODE: sb.append(BIG_ASCIDOC_STYLE); sb.append(HASH); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(HASH); break; case SUB_SCRIPT_NODE: sb.append(SUB_SCRIPT_ASCIDOC_STYLE); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(SUB_SCRIPT_ASCIDOC_STYLE); break; case SUPER_SCRIPT_NODE: sb.append(SUPER_SCRIPT_ASCIDOC_STYLE); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(SUPER_SCRIPT_ASCIDOC_STYLE); break; case DEL_NODE: @@ -251,7 +260,7 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa case STRIKE_NODE: sb.append(LINE_THROUGH_ASCIDOC_STYLE); sb.append(HASH); - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); sb.append(HASH); break; case NEW_LINE_NODE: @@ -272,10 +281,58 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa text = startingSpaceMatcher.replaceFirst(""); } - appendEscapedAsciiDoc(sb, text, inlineMacroMode); + appendEscapedAsciiDoc(sb, text, inlineMacroMode, context); + break; + case TABLE_NODE: + newLine(sb); + newLine(sb); + sb.append(TABLE_MARKER); + newLine(sb); + context.inTable = true; + context.firstTableRow = true; + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); + context.inTable = false; + context.firstTableRow = false; + sb.append(TABLE_MARKER); + newLine(sb); + break; + case THEAD_NODE: + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); + break; + case TBODY_NODE: + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); + break; + case TR_NODE: + trimTrailingWhitespaces(sb); + if (!context.firstTableRow) { + newLine(sb); + } + newLine(sb); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); + context.firstTableRow = false; + break; + case TH_NODE: + if (!context.firstTableRow) { + sb.append(COLUMN_HEADER_MARKER); + } else { + sb.append(COLUMN_MARKER); + } + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); + trimTrailingWhitespaces(sb); + if (!context.firstTableRow) { + newLine(sb); + } + break; + case TD_NODE: + sb.append(COLUMN_MARKER); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); + trimTrailingWhitespaces(sb); + if (!context.firstTableRow) { + newLine(sb); + } break; default: - htmlToAsciidoc(sb, childNode, inlineMacroMode); + htmlToAsciidoc(sb, childNode, inlineMacroMode, context); break; } } @@ -351,6 +408,20 @@ private static StringBuilder trimText(StringBuilder sb, String charsToTrim) { return sb; } + private static void trimTrailingWhitespaces(StringBuilder sb) { + int j = -1; + for (int i = sb.length() - 1; i >= 0; i--) { + if (Character.isWhitespace(sb.charAt(i))) { + j = i; + } else { + break; + } + } + if (j >= 0) { + sb.setLength(j); + } + } + private static StringBuilder unescapeHtmlEntities(StringBuilder sb, String text) { int i = 0; /* trim leading whitespace */ @@ -417,7 +488,8 @@ private static StringBuilder unescapeHtmlEntities(StringBuilder sb, String text) return sb; } - private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text, boolean inlineMacroMode) { + private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text, boolean inlineMacroMode, + Context context) { boolean escaping = false; for (int i = 0; i < text.length(); i++) { final char ch = text.charAt(i); @@ -453,6 +525,16 @@ private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text } sb.append("{plus}"); break; + case '!': + if (escaping) { + sb.append("++"); + escaping = false; + } + if (context.inTable) { + sb.append('\\'); + } + sb.append(ch); + break; default: if (escaping) { sb.append("++"); @@ -466,4 +548,10 @@ private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text } return sb; } + + private static class Context { + + boolean inTable; + boolean firstTableRow; + } } From c4ba187110f0bec6e8f5a240182cfb3c9f873519 Mon Sep 17 00:00:00 2001 From: Guillaume Smet Date: Mon, 25 Nov 2024 12:38:07 +0100 Subject: [PATCH 3/3] Config Doc - Enforce a new line after ol/ul --- .../config/formatter/JavadocToAsciidocTransformer.java | 1 + 1 file changed, 1 insertion(+) diff --git a/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java b/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java index adf98de79d86c..19a23c1a0427d 100644 --- a/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java +++ b/core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToAsciidocTransformer.java @@ -193,6 +193,7 @@ private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMa newLine(sb); htmlToAsciidoc(sb, childNode, inlineMacroMode, context); newLine(sb); + newLine(sb); break; case LIST_ITEM_NODE: final String marker = childNode.parentNode().nodeName().equals(ORDERED_LIST_NODE)