Skip to content

Commit

Permalink
Working on javadoc image fix
Browse files Browse the repository at this point in the history
Image extraction works, regular paths work too.

No tests.

Need to figure out how to get path of source jar.

Fixes eclipse-jdtls#1007

Signed-off-by: Nikolas Komonen <[email protected]>
  • Loading branch information
NikolasKomonen committed Aug 1, 2019
1 parent d15cf6a commit 1a04501
Show file tree
Hide file tree
Showing 3 changed files with 317 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ public class JavaDoc2MarkdownConverter extends AbstractJavaDocConverter {
Whitelist w = (Whitelist) whitelistField.get(c);

w.addProtocols("a", "href", "file", "jdt");

//Allow all types of paths, not only URI's with http or https protocol
w.removeProtocols("img", "src", "http", "https");
} catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) {
JavaLanguageServerPlugin.logException("Unable to modify jsoup to include file and jdt protocols", e);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
/*******************************************************************************
* Copyright (c) 2019 Red Hat Inc. and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Red Hat Inc. - initial API and implementation
*******************************************************************************/
package org.eclipse.jdt.ls.core.internal.javadoc;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.util.jar.JarFile;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;

import org.eclipse.core.internal.runtime.InternalPlatform;
import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.Platform;
import org.eclipse.jdt.core.IJavaElement;
import org.eclipse.jdt.core.JavaModelException;
import org.eclipse.jdt.core.dom.TextElement;
import org.eclipse.jdt.internal.core.PackageFragment;
import org.eclipse.jdt.ls.core.internal.IConstants;


/**
* @author Nikolas Komonen - [email protected]
*
*/
public class JavaDocHTMLPathHandler {

public static final String[] tags = { "img" };

/**
* Returns true if the text is an HTML tag in the defined array of tag names in
* {@link JavaDocHTMLPathHandler#tags}
*
* @param text
* @return
*/
public static boolean isHTMLTag(String text) {

if (!text.startsWith("<") && !text.endsWith(">")) {
return false;
}

//find index of tag name
int i;
for (i = 1; i < text.length() - 1; i++) { // - 1 to exclude '>'
char c = text.charAt(i);
if (Character.isWhitespace(c)) {
break;
}
}

if (i == 1) {
return false;
}

String tagName = text.substring(1, i);

for (String tag : tags) {
if (tag.equals(tagName)) {
return true;
}
}

return false;
}

/**
* Given a {@link TextElement} that represents an HTML tag with a 'src'
* attribute, it will extract the image from the jar if necessary and copy it to
* the 'outputPath'.
*
* @param child
* @param fElement
* @return
*/
public static String getValidatedHTMLSrcAttribute(TextElement child, IJavaElement fElement) {

//Check if current src attribute path needs to be validated
String text = child.getText();
int offsets[] = extractSourcePathFromHTMLTag(text);
if (offsets == null) {
return text;
}
String srcPath = text.substring(offsets[0], offsets[1]);
String fileName = Paths.get(srcPath).getFileName().toString();

if (!isPathAbsolute(srcPath)) {
return text; //Current path is good as is.
}

//Get the initial internal jar fragment
IJavaElement javaElement = fElement.getParent();

while (!(javaElement instanceof PackageFragment)) {
javaElement = javaElement.getParent();
}

if (javaElement == null) {
return text;
}

PackageFragment jarFragment = (PackageFragment) javaElement; //Relative path to jarFragmentRoot that points to working directory

//folder names are separated by '.'
String fragmentName = jarFragment.getElementName();
String[] pp = fragmentName.split("\\.");
String fragmentPath = "";
for (String name : pp) {
fragmentPath += name + System.getProperty("file.separator");
}

String relativeToJarPath = fragmentPath + srcPath;
InputStream is = null; // file from jar (ZipEntry)
try {
String currentJarPath = null;
JarFile jar = null;
ZipEntry currentZipEntry = null;

URL javadocJarBaseLocationURL = JavaDocLocations.getJavadocBaseLocation(jarFragment); //Absolute location of javadoc jar (not class or source jar)
//Attempt to get file from javadoc jar
if (javadocJarBaseLocationURL != null) {
URI javadocJarBaseLocationURI = javadocJarBaseLocationURL.toURI();
currentJarPath = getJarPathFromURI(javadocJarBaseLocationURI);
jar = new JarFile(currentJarPath);
currentZipEntry = jar.getEntry(relativeToJarPath);
}

//No file was in the javadoc jar, try the source jar
if (currentZipEntry == null) {
URI sourceJarBaseLocationURI = new URI(jarFragment.getPath().toOSString()); //Absolute location of source jar
currentJarPath = getJarPathFromURI(sourceJarBaseLocationURI);
jar = new JarFile(currentJarPath);
currentZipEntry = jar.getEntry(relativeToJarPath);
}

if (jar == null || currentZipEntry == null) {
return text; //File from source path could not be located in either jar
}

//Create new path to extract images to
IPath stateLocationPath = InternalPlatform.getDefault().getStateLocation(Platform.getBundle(IConstants.PLUGIN_ID));

String jarRootName = jarFragment.getPackageFragmentRoot().getElementName();
if (jarRootName.endsWith(".jar")) {
jarRootName = jarRootName.substring(0, jarRootName.length() - 4);
}

//Path to the extracted file
String outputPath = stateLocationPath.toOSString() + "/extracted-jar-images/" + jarRootName + "/" + fileName;

File outputFile = new File(outputPath);

//Check if the file actually needs to be extracted
if (!outputFile.exists()) {
is = jar.getInputStream(currentZipEntry);
extractFileTo(is, outputPath);
} else {//Check if the file is outdated
BasicFileAttributes existingOutputFileAttributes = Files.readAttributes(outputFile.toPath(), BasicFileAttributes.class);
FileTime existingFileCreationTime = existingOutputFileAttributes.creationTime();
BasicFileAttributes jarOutputFileAttributes = Files.readAttributes(Paths.get(currentJarPath), BasicFileAttributes.class);

FileTime jarFileCreationTime = jarOutputFileAttributes.creationTime();

if (jarFileCreationTime.compareTo(existingFileCreationTime) > 0) {
is = jar.getInputStream(currentZipEntry);
extractFileTo(is, outputPath);
}
}

//Insert new path into text
return text.substring(0, offsets[0]) + "file://" + outputPath + text.substring(offsets[1]);

} catch (JavaModelException e3) {
return text;
} catch (IOException e) {
return text;
} catch (URISyntaxException e) {
return text;
} finally {
//cleanup
if (is != null) {
try {
is.close();
} catch (IOException e) {
}
}
}
}

/**
* Gets the position between the quotes of a src attribute. Will look for
* something similar to (src="...") and extract the path from inside.
*
* Start offset is at offsets[0], after the start quotation. End offset is at
* offsets[1], before the end quotation.
*
* Offsets are at '|':
*
* src="|nikolas/wrote/this|"
*
* If the src attribute cannot be found, null is returned.
*
* @param text
* @return int[] with start and end offset of src attribute value, else null.
*/
public static int[] extractSourcePathFromHTMLTag(String text) {
Pattern p = Pattern.compile("(src\\s*=\\s*['\"])");
Matcher m = p.matcher(text);
if (m.find()) {
int srcStartQuote = m.end();
char quote = text.charAt(srcStartQuote - 1);
int srcEndQuote = text.indexOf(quote, srcStartQuote);
int[] offsets = { srcStartQuote, srcEndQuote };
return offsets;
}
return null;
}

public static String getJarPathFromURI(URI uri) {
String pathWithScheme = uri.getSchemeSpecificPart();
String finalJarRootPath = pathWithScheme.substring(pathWithScheme.indexOf(':') + 1);

//clean up/verify the jar path
int actualJarIndex = finalJarRootPath.lastIndexOf(".jar");

if (actualJarIndex == -1) {
return null;
}

return finalJarRootPath.substring(0, actualJarIndex + 4);
}

/**
* Checks if a given path is absolute. This path can be in the format of a URI
* or local OS specific path.
*
* This method is needed because uri.isAbsolute() will always return false if
* the scheme is missing, so non-URI paths need to be checked as well.
*
* @param path
* in format of URI or local OS path
* @return true if path/URI is absolute
*/
private static boolean isPathAbsolute(String path) {
try {
URI uri = new URI(path);
if (uri.getScheme() == null) {
if (uri.isAbsolute()) {
return false;
}
if (Paths.get(path).isAbsolute()) { //local absolute path without URI
return false;
}
return true;
}
return false;

} catch (URISyntaxException e) {
return true; //path is not a URI, so it should be extracted
}
}

/**
* Given an inputstream, outputs a file to the given path
*
* @param fileToExtract
* @param pathToExtractTo
* @return
*/
private static boolean extractFileTo(InputStream fileToExtract, String pathToExtractTo) {
FileOutputStream os = null;
try {
File newFile = new File(pathToExtractTo);
newFile.getParentFile().mkdirs();
os = new FileOutputStream(pathToExtractTo);
int c;
while ((c = fileToExtract.read()) != -1) {
os.write(c);
}
} catch (IOException e) {
return false;
} finally {
if (os != null) {
try {
os.close();
} catch (IOException e) {
return false;
}
}
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,10 @@ private void handleContentElements(List<? extends ASTNode> nodes, boolean skipLe
previousNode = child;
if (child instanceof TextElement) {
String text = ((TextElement) child).getText();
if (JavaDocHTMLPathHandler.isHTMLTag(text)) {
text = JavaDocHTMLPathHandler.getValidatedHTMLSrcAttribute((TextElement) child, fElement);
}

if (skipLeadingWhitespace) {
text = text.replaceFirst("^\\s", ""); //$NON-NLS-1$ //$NON-NLS-2$
}
Expand Down

0 comments on commit 1a04501

Please sign in to comment.