Skip to content

Commit

Permalink
Merge pull request #467 from support-project/feature/issue172_pdf_to_…
Browse files Browse the repository at this point in the history
…image

#172 Change the library "com-sun-pdfview" to "pdfbox" for pdf to png
  • Loading branch information
koda-masaru authored Sep 13, 2016
2 parents 93eb3cd + 6c9e7e0 commit 36f6247
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import java.io.IOException;

import org.support.project.common.util.StringUtils;
import org.support.project.knowledge.parser.impl.PdfSlideShowParser;
import org.support.project.knowledge.parser.impl.PdfSlideShowParserOnPdfbox;
import org.support.project.knowledge.parser.impl.PptxSlideShowParser;

public class SlideShowParserFactory {
Expand All @@ -19,7 +19,7 @@ public static SlideShowParser getParser(String filename) throws IOException {
if (extension.toLowerCase().equals(".pptx")) {
return new PptxSlideShowParser();
} else if (extension.toLowerCase().equals(".pdf")) {
return new PdfSlideShowParser();
return new PdfSlideShowParserOnPdfbox();
} else {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.support.project.knowledge.parser.impl;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.util.ImageIOUtil;
import org.support.project.common.exception.ParseException;
import org.support.project.knowledge.parser.SlideShowParser;

public class PdfSlideShowParserOnPdfbox implements SlideShowParser {

@Override
public void parse(File inputFile, File outputDir) throws ParseException {
try {
/*
* Solution for the 1.8 version:
*/
PDDocument document = PDDocument.loadNonSeq(inputFile, null);
List<PDPage> pdPages = document.getDocumentCatalog().getAllPages();
int page = 0;
for (PDPage pdPage : pdPages) {
++page;
BufferedImage bim = pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, 300);
ImageIOUtil.writeImage(bim, outputDir.getAbsolutePath() + "/" + page + ".png", 300);
}
document.close();

/*
* Solution for the 2.0 version:
PDDocument document = PDDocument.load(inputFile);
PDFRenderer pdfRenderer = new PDFRenderer(document);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
// suffix in filename will be used as the file format
ImageIOUtil.writeImage(bim, outputDir.getAbsolutePath() + "/" + (page + 1) + ".png", 300);
}
document.close();
*/

} catch (IOException e) {
throw new ParseException(e);
}
}

}

Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
import org.support.project.common.exception.ParseException;
import org.support.project.common.util.FileUtil;
import org.support.project.knowledge.config.AppConfig;
import org.support.project.knowledge.parser.impl.PdfSlideShowParser;
import org.support.project.knowledge.parser.impl.PdfSlideShowParserOnPdfbox;

public class PdfSlideShowParserTest {

public static final String SAMPLE = "sample2";
public static final String SAMPLE = "sample4";

public static void main(String[] args) throws FileNotFoundException, IOException, ParseException {
File tempDir = new File(AppConfig.get().getTmpPath());
Expand All @@ -24,7 +24,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException
FileUtil.copy(PdfSlideShowParserTest.class.getResourceAsStream("/org/support/project/knowledge/paeser/" + SAMPLE + ".pdf"),
new FileOutputStream(sample));

PdfSlideShowParser parser = new PdfSlideShowParser();
SlideShowParser parser = new PdfSlideShowParserOnPdfbox();
parser.parse(sample, output);
}

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 36f6247

Please sign in to comment.