diff --git a/CHANGES.txt b/CHANGES.txt index 2005548262..e3f9d25fb3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,9 @@ +Release 1.23 - ??/??/??? + + * NOTE: tika-server no longer hard-codes the HtmlParser to handle + XML files (TIKA-2910). Users must now configure that behavior + via a tika-config.xml file. + Release 1.22 - 07/29/2019 * NOTE: Known regression: PDFBOX-4587 -- PDF passwords with codepoints diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java index 61af2f21fd..841920c7dc 100644 --- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java +++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java @@ -37,7 +37,6 @@ import org.apache.tika.parser.ParserDecorator; import org.apache.tika.parser.PasswordProvider; import org.apache.tika.parser.html.BoilerpipeContentHandler; -import org.apache.tika.parser.html.HtmlParser; import org.apache.tika.parser.ocr.TesseractOCRConfig; import org.apache.tika.parser.pdf.PDFParserConfig; import org.apache.tika.sax.BodyContentHandler; @@ -119,7 +118,7 @@ public static Parser createParser() { final Parser parser = new AutoDetectParser(tikaConfig); Map parsers = ((AutoDetectParser)parser).getParsers(); - parsers.put(MediaType.APPLICATION_XML, new HtmlParser()); + ((AutoDetectParser)parser).setParsers(parsers); ((AutoDetectParser)parser).setFallback(new Parser() {