Working on javadoc image fix

Image extraction works, regular paths work too. No tests. Need to figure out how to get path of source jar. Fixes eclipse-jdtls#1007 Signed-off-by: Nikolas Komonen <[email protected]>
NikolasKomonen · Aug 1, 2019 · 1a04501 · 1a04501
1 parent d15cf6a
commit 1a04501
Show file tree

Hide file tree

Showing 3 changed files with 317 additions and 0 deletions.
diff --git a/...e.jdt.ls.core/src/org/eclipse/jdt/ls/core/internal/javadoc/JavaDoc2MarkdownConverter.java b/...e.jdt.ls.core/src/org/eclipse/jdt/ls/core/internal/javadoc/JavaDoc2MarkdownConverter.java
@@ -51,6 +51,9 @@ public class JavaDoc2MarkdownConverter extends AbstractJavaDocConverter {
 			Whitelist w = (Whitelist) whitelistField.get(c);
 
 			w.addProtocols("a", "href", "file", "jdt");
+
+			//Allow all types of paths, not only URI's with http or https protocol
+			w.removeProtocols("img", "src", "http", "https");
 		} catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) {
 			JavaLanguageServerPlugin.logException("Unable to modify jsoup to include file and jdt protocols", e);
 		}

diff --git a/...ipse.jdt.ls.core/src/org/eclipse/jdt/ls/core/internal/javadoc/JavaDocHTMLPathHandler.java b/...ipse.jdt.ls.core/src/org/eclipse/jdt/ls/core/internal/javadoc/JavaDocHTMLPathHandler.java
@@ -0,0 +1,310 @@
+/*******************************************************************************
+ * Copyright (c) 2019 Red Hat Inc. and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     Red Hat Inc. - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.jdt.ls.core.internal.javadoc;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.nio.file.attribute.FileTime;
+import java.util.jar.JarFile;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.zip.ZipEntry;
+
+import org.eclipse.core.internal.runtime.InternalPlatform;
+import org.eclipse.core.runtime.IPath;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.jdt.core.IJavaElement;
+import org.eclipse.jdt.core.JavaModelException;
+import org.eclipse.jdt.core.dom.TextElement;
+import org.eclipse.jdt.internal.core.PackageFragment;
+import org.eclipse.jdt.ls.core.internal.IConstants;
+
+
+/**
+ * @author Nikolas Komonen - [email protected]
+ *
+ */
+public class JavaDocHTMLPathHandler {
+
+	public static final String[] tags = { "img" };
+
+	/**
+	 * Returns true if the text is an HTML tag in the defined array of tag names in
+	 * {@link JavaDocHTMLPathHandler#tags}
+	 *
+	 * @param text
+	 * @return
+	 */
+	public static boolean isHTMLTag(String text) {
+
+		if (!text.startsWith("<") && !text.endsWith(">")) {
+			return false;
+		}
+
+		//find index of tag name
+		int i;
+		for (i = 1; i < text.length() - 1; i++) { // - 1 to exclude '>'
+			char c = text.charAt(i);
+			if (Character.isWhitespace(c)) {
+				break;
+			}
+		}
+
+		if (i == 1) {
+			return false;
+		}
+
+		String tagName = text.substring(1, i);
+
+		for (String tag : tags) {
+			if (tag.equals(tagName)) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * Given a {@link TextElement} that represents an HTML tag with a 'src'
+	 * attribute, it will extract the image from the jar if necessary and copy it to
+	 * the 'outputPath'.
+	 *
+	 * @param child
+	 * @param fElement
+	 * @return
+	 */
+	public static String getValidatedHTMLSrcAttribute(TextElement child, IJavaElement fElement) {
+
+		//Check if current src attribute path needs to be validated
+		String text = child.getText();
+		int offsets[] = extractSourcePathFromHTMLTag(text);
+		if (offsets == null) {
+			return text;
+		}
+		String srcPath = text.substring(offsets[0], offsets[1]);
+		String fileName = Paths.get(srcPath).getFileName().toString();
+
+		if (!isPathAbsolute(srcPath)) {
+			return text; //Current path is good as is.
+		}
+
+		//Get the initial internal jar fragment
+		IJavaElement javaElement = fElement.getParent();
+
+		while (!(javaElement instanceof PackageFragment)) {
+			javaElement = javaElement.getParent();
+		}
+
+		if (javaElement == null) {
+			return text;
+		}
+
+		PackageFragment jarFragment = (PackageFragment) javaElement; //Relative path to jarFragmentRoot that points to working directory
+
+		//folder names are separated by '.'
+		String fragmentName = jarFragment.getElementName();
+		String[] pp = fragmentName.split("\\.");
+		String fragmentPath = "";
+		for (String name : pp) {
+			fragmentPath += name + System.getProperty("file.separator");
+		}
+
+		String relativeToJarPath = fragmentPath + srcPath;
+		InputStream is = null; // file from jar (ZipEntry)
+		try {
+			String currentJarPath = null;
+			JarFile jar = null;
+			ZipEntry currentZipEntry = null;
+
+			URL javadocJarBaseLocationURL = JavaDocLocations.getJavadocBaseLocation(jarFragment); //Absolute location of javadoc jar (not class or source jar)
+			//Attempt to get file from javadoc jar
+			if (javadocJarBaseLocationURL != null) {
+				URI javadocJarBaseLocationURI = javadocJarBaseLocationURL.toURI();
+				currentJarPath = getJarPathFromURI(javadocJarBaseLocationURI);
+				jar = new JarFile(currentJarPath);
+				currentZipEntry = jar.getEntry(relativeToJarPath);
+			}
+
+			//No file was in the javadoc jar, try the source jar
+			if (currentZipEntry == null) {
+				URI sourceJarBaseLocationURI = new URI(jarFragment.getPath().toOSString()); //Absolute location of source jar
+				currentJarPath = getJarPathFromURI(sourceJarBaseLocationURI);
+				jar = new JarFile(currentJarPath);
+				currentZipEntry = jar.getEntry(relativeToJarPath);
+			}
+
+			if (jar == null || currentZipEntry == null) {
+				return text; //File from source path could not be located in either jar
+			}
+
+			//Create new path to extract images to
+			IPath stateLocationPath = InternalPlatform.getDefault().getStateLocation(Platform.getBundle(IConstants.PLUGIN_ID));
+
+			String jarRootName = jarFragment.getPackageFragmentRoot().getElementName();
+			if (jarRootName.endsWith(".jar")) {
+				jarRootName = jarRootName.substring(0, jarRootName.length() - 4);
+			}
+
+			//Path to the extracted file
+			String outputPath = stateLocationPath.toOSString() + "/extracted-jar-images/" + jarRootName + "/" + fileName;
+
+			File outputFile = new File(outputPath);
+
+			//Check if the file actually needs to be extracted
+			if (!outputFile.exists()) {
+				is = jar.getInputStream(currentZipEntry);
+				extractFileTo(is, outputPath);
+			} else {//Check if the file is outdated
+				BasicFileAttributes existingOutputFileAttributes = Files.readAttributes(outputFile.toPath(), BasicFileAttributes.class);
+				FileTime existingFileCreationTime = existingOutputFileAttributes.creationTime();
+				BasicFileAttributes jarOutputFileAttributes = Files.readAttributes(Paths.get(currentJarPath), BasicFileAttributes.class);
+
+				FileTime jarFileCreationTime = jarOutputFileAttributes.creationTime();
+
+				if (jarFileCreationTime.compareTo(existingFileCreationTime) > 0) {
+					is = jar.getInputStream(currentZipEntry);
+					extractFileTo(is, outputPath);
+				}
+			}
+
+			//Insert new path into text
+			return text.substring(0, offsets[0]) + "file://" + outputPath + text.substring(offsets[1]);
+
+		} catch (JavaModelException e3) {
+			return text;
+		} catch (IOException e) {
+			return text;
+		} catch (URISyntaxException e) {
+			return text;
+		} finally {
+			//cleanup
+			if (is != null) {
+				try {
+					is.close();
+				} catch (IOException e) {
+				}
+			}
+		}
+	}
+
+	/**
+	 * Gets the position between the quotes of a src attribute. Will look for
+	 * something similar to (src="...") and extract the path from inside.
+	 *
+	 * Start offset is at offsets[0], after the start quotation. End offset is at
+	 * offsets[1], before the end quotation.
+	 *
+	 * Offsets are at '|':
+	 *
+	 * src="|nikolas/wrote/this|"
+	 *
+	 * If the src attribute cannot be found, null is returned.
+	 *
+	 * @param text
+	 * @return int[] with start and end offset of src attribute value, else null.
+	 */
+	public static int[] extractSourcePathFromHTMLTag(String text) {
+		Pattern p = Pattern.compile("(src\\s*=\\s*['\"])");
+		Matcher m = p.matcher(text);
+		if (m.find()) {
+			int srcStartQuote = m.end();
+			char quote = text.charAt(srcStartQuote - 1);
+			int srcEndQuote = text.indexOf(quote, srcStartQuote);
+			int[] offsets = { srcStartQuote, srcEndQuote };
+			return offsets;
+		}
+		return null;
+	}
+
+	public static String getJarPathFromURI(URI uri) {
+		String pathWithScheme = uri.getSchemeSpecificPart();
+		String finalJarRootPath = pathWithScheme.substring(pathWithScheme.indexOf(':') + 1);
+
+		//clean up/verify the jar path
+		int actualJarIndex = finalJarRootPath.lastIndexOf(".jar");
+
+		if (actualJarIndex == -1) {
+			return null;
+		}
+
+		return finalJarRootPath.substring(0, actualJarIndex + 4);
+	}
+
+	/**
+	 * Checks if a given path is absolute. This path can be in the format of a URI
+	 * or local OS specific path.
+	 *
+	 * This method is needed because uri.isAbsolute() will always return false if
+	 * the scheme is missing, so non-URI paths need to be checked as well.
+	 *
+	 * @param path
+	 *            in format of URI or local OS path
+	 * @return true if path/URI is absolute
+	 */
+	private static boolean isPathAbsolute(String path) {
+		try {
+			URI uri = new URI(path);
+			if (uri.getScheme() == null) {
+				if (uri.isAbsolute()) {
+					return false;
+				}
+				if (Paths.get(path).isAbsolute()) { //local absolute path without URI
+					return false;
+				}
+				return true;
+			}
+			return false;
+
+		} catch (URISyntaxException e) {
+			return true; //path is not a URI, so it should be extracted
+		}
+	}
+
+	/**
+	 * Given an inputstream, outputs a file to the given path
+	 *
+	 * @param fileToExtract
+	 * @param pathToExtractTo
+	 * @return
+	 */
+	private static boolean extractFileTo(InputStream fileToExtract, String pathToExtractTo) {
+		FileOutputStream os = null;
+		try {
+			File newFile = new File(pathToExtractTo);
+			newFile.getParentFile().mkdirs();
+			os = new FileOutputStream(pathToExtractTo);
+			int c;
+			while ((c = fileToExtract.read()) != -1) {
+				os.write(c);
+			}
+		} catch (IOException e) {
+			return false;
+		} finally {
+			if (os != null) {
+				try {
+					os.close();
+				} catch (IOException e) {
+					return false;
+				}
+			}
+		}
+		return true;
+	}
+}
diff --git a/...lipse.jdt.ls.core/src/org/eclipse/jdt/ls/core/internal/javadoc/JavadocContentAccess2.java b/...lipse.jdt.ls.core/src/org/eclipse/jdt/ls/core/internal/javadoc/JavadocContentAccess2.java
@@ -1461,6 +1461,10 @@ private void handleContentElements(List<? extends ASTNode> nodes, boolean skipLe
 			previousNode = child;
 			if (child instanceof TextElement) {
 				String text = ((TextElement) child).getText();
+				if (JavaDocHTMLPathHandler.isHTMLTag(text)) {
+					text = JavaDocHTMLPathHandler.getValidatedHTMLSrcAttribute((TextElement) child, fElement);
+				}
+
 				if (skipLeadingWhitespace) {
 					text = text.replaceFirst("^\\s", ""); //$NON-NLS-1$ //$NON-NLS-2$
 				}