Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Config Doc - Improve HTML Javadoc -> Asciidoc conversion #44679

Merged
merged 3 commits into from
Nov 25, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package io.quarkus.annotation.processor.documentation.config.formatter;

import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
Expand Down Expand Up @@ -47,6 +50,12 @@ public final class JavadocToAsciidocTransformer {
private static final String UN_ORDERED_LIST_NODE = "ul";
private static final String PREFORMATED_NODE = "pre";
private static final String BLOCKQUOTE_NODE = "blockquote";
private static final String TABLE_NODE = "table";
private static final String THEAD_NODE = "thead";
private static final String TBODY_NODE = "tbody";
private static final String TR_NODE = "tr";
private static final String TH_NODE = "th";
private static final String TD_NODE = "td";

private static final String BIG_ASCIDOC_STYLE = "[.big]";
private static final String LINK_ATTRIBUTE_FORMAT = "[%s]";
Expand All @@ -61,6 +70,11 @@ public final class JavadocToAsciidocTransformer {
private static final String CODE_BLOCK_ASCIDOC_STYLE = "```";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE = "[quote]\n____";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END = "____";
private static final String TABLE_MARKER = "!===";
private static final String COLUMN_HEADER_MARKER = "h!";
private static final String COLUMN_MARKER = "!";

private static final Pattern INLINE_TAG_MARKER_PATTERN = Pattern.compile("§§([0-9]+)§§");

private JavadocToAsciidocTransformer() {
}
Expand All @@ -84,7 +98,11 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in
// we add it as it has been previously removed
Javadoc parsedJavadoc = StaticJavaParser.parseJavadoc(START_OF_LINE.matcher(javadoc).replaceAll("* "));

StringBuilder htmlJavadoc = new StringBuilder(javadoc.length());

int markerCounter = 0;
StringBuilder sb = new StringBuilder();
Map<Integer, String> inlineTagsReplacements = new TreeMap<>();

for (JavadocDescriptionElement javadocDescriptionElement : parsedJavadoc.getDescription().getElements()) {
if (javadocDescriptionElement instanceof JavadocInlineTag) {
Expand All @@ -95,40 +113,56 @@ public static String toAsciidoc(String javadoc, JavadocFormat format, boolean in
case VALUE:
case LITERAL:
case SYSTEM_PROPERTY:
sb.setLength(0);
sb.append('`');
appendEscapedAsciiDoc(sb, content, inlineMacroMode);
appendEscapedAsciiDoc(sb, content, inlineMacroMode, new Context());
sb.append('`');
htmlJavadoc.append("§§" + markerCounter + "§§");
inlineTagsReplacements.put(markerCounter, sb.toString());
markerCounter++;
break;
case LINK:
case LINKPLAIN:
if (content.startsWith(HASH)) {
content = ConfigNamingUtil.hyphenate(content.substring(1));
}
sb.setLength(0);
sb.append('`');
appendEscapedAsciiDoc(sb, content, inlineMacroMode);
appendEscapedAsciiDoc(sb, content, inlineMacroMode, new Context());
sb.append('`');
htmlJavadoc.append("§§" + markerCounter + "§§");
inlineTagsReplacements.put(markerCounter, sb.toString());
markerCounter++;
break;
default:
sb.append(content);
htmlJavadoc.append(content);
break;
}
} else {
appendHtml(sb, Jsoup.parseBodyFragment(javadocDescriptionElement.toText()), inlineMacroMode);
htmlJavadoc.append(javadocDescriptionElement.toText());
}
}

String asciidoc = trim(sb);
StringBuilder asciidocSb = new StringBuilder();
htmlToAsciidoc(asciidocSb, Jsoup.parseBodyFragment(htmlJavadoc.toString()), inlineMacroMode, new Context());
String asciidoc = trim(asciidocSb);

// not very optimal and could be included in htmlToAsciidoc() but simpler so let's go for it
if (!inlineTagsReplacements.isEmpty()) {
asciidoc = INLINE_TAG_MARKER_PATTERN.matcher(asciidoc)
.replaceAll(mr -> Matcher.quoteReplacement(inlineTagsReplacements.get(Integer.valueOf(mr.group(1)))));
}

return asciidoc.isBlank() ? null : asciidoc;
}

private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroMode) {
private static void htmlToAsciidoc(StringBuilder sb, Node node, boolean inlineMacroMode, Context context) {
for (Node childNode : node.childNodes()) {
switch (childNode.nodeName()) {
case PARAGRAPH_NODE:
newLine(sb);
newLine(sb);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case PREFORMATED_NODE:
newLine(sb);
Expand All @@ -148,7 +182,7 @@ private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroM
newLine(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
newLineIfNeeded(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END);
newLine(sb);
Expand All @@ -157,75 +191,77 @@ private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroM
case ORDERED_LIST_NODE:
case UN_ORDERED_LIST_NODE:
newLine(sb);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
newLine(sb);
newLine(sb);
break;
case LIST_ITEM_NODE:
final String marker = childNode.parent().nodeName().equals(ORDERED_LIST_NODE)
final String marker = childNode.parentNode().nodeName().equals(ORDERED_LIST_NODE)
? ORDERED_LIST_ITEM_ASCIDOC_STYLE
: UNORDERED_LIST_ITEM_ASCIDOC_STYLE;
newLine(sb);
sb.append(marker);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case LINK_NODE:
final String link = childNode.attr(HREF_ATTRIBUTE);
sb.append("link:");
sb.append(link);
final StringBuilder caption = new StringBuilder();
appendHtml(caption, childNode, inlineMacroMode);
htmlToAsciidoc(caption, childNode, inlineMacroMode, context);
sb.append(String.format(LINK_ATTRIBUTE_FORMAT, trim(caption)));
break;
case CODE_NODE:
sb.append(BACKTICK);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(BACKTICK);
break;
case BOLD_NODE:
case STRONG_NODE:
sb.append(STAR);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(STAR);
break;
case EMPHASIS_NODE:
case ITALICS_NODE:
sb.append(UNDERSCORE);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(UNDERSCORE);
break;
case UNDERLINE_NODE:
sb.append(UNDERLINE_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case SMALL_NODE:
sb.append(SMALL_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case BIG_NODE:
sb.append(BIG_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case SUB_SCRIPT_NODE:
sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
break;
case SUPER_SCRIPT_NODE:
sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
break;
case DEL_NODE:
case S_NODE:
case STRIKE_NODE:
sb.append(LINE_THROUGH_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
sb.append(HASH);
break;
case NEW_LINE_NODE:
Expand All @@ -246,10 +282,58 @@ private static void appendHtml(StringBuilder sb, Node node, boolean inlineMacroM
text = startingSpaceMatcher.replaceFirst("");
}

appendEscapedAsciiDoc(sb, text, inlineMacroMode);
appendEscapedAsciiDoc(sb, text, inlineMacroMode, context);
break;
case TABLE_NODE:
newLine(sb);
newLine(sb);
sb.append(TABLE_MARKER);
newLine(sb);
context.inTable = true;
context.firstTableRow = true;
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
context.inTable = false;
context.firstTableRow = false;
sb.append(TABLE_MARKER);
newLine(sb);
break;
case THEAD_NODE:
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case TBODY_NODE:
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
case TR_NODE:
trimTrailingWhitespaces(sb);
if (!context.firstTableRow) {
newLine(sb);
}
newLine(sb);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
context.firstTableRow = false;
break;
case TH_NODE:
if (!context.firstTableRow) {
sb.append(COLUMN_HEADER_MARKER);
} else {
sb.append(COLUMN_MARKER);
}
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
trimTrailingWhitespaces(sb);
if (!context.firstTableRow) {
newLine(sb);
}
break;
case TD_NODE:
sb.append(COLUMN_MARKER);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
trimTrailingWhitespaces(sb);
if (!context.firstTableRow) {
newLine(sb);
}
break;
default:
appendHtml(sb, childNode, inlineMacroMode);
htmlToAsciidoc(sb, childNode, inlineMacroMode, context);
break;
}
}
Expand Down Expand Up @@ -325,6 +409,20 @@ private static StringBuilder trimText(StringBuilder sb, String charsToTrim) {
return sb;
}

private static void trimTrailingWhitespaces(StringBuilder sb) {
int j = -1;
for (int i = sb.length() - 1; i >= 0; i--) {
if (Character.isWhitespace(sb.charAt(i))) {
j = i;
} else {
break;
}
}
if (j >= 0) {
sb.setLength(j);
}
}

private static StringBuilder unescapeHtmlEntities(StringBuilder sb, String text) {
int i = 0;
/* trim leading whitespace */
Expand Down Expand Up @@ -391,7 +489,8 @@ private static StringBuilder unescapeHtmlEntities(StringBuilder sb, String text)
return sb;
}

private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text, boolean inlineMacroMode) {
private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text, boolean inlineMacroMode,
Context context) {
boolean escaping = false;
for (int i = 0; i < text.length(); i++) {
final char ch = text.charAt(i);
Expand Down Expand Up @@ -427,6 +526,16 @@ private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text
}
sb.append("{plus}");
break;
case '!':
if (escaping) {
sb.append("++");
escaping = false;
}
if (context.inTable) {
sb.append('\\');
}
sb.append(ch);
break;
default:
if (escaping) {
sb.append("++");
Expand All @@ -440,4 +549,10 @@ private static StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text
}
return sb;
}

private static class Context {

boolean inTable;
boolean firstTableRow;
}
}
Loading