From 72742b5cab7746da7c206941cf328cd7981cd53e Mon Sep 17 00:00:00 2001 From: Arnaud Ringenbach Date: Tue, 15 Feb 2022 09:49:03 +0100 Subject: [PATCH 1/3] Fix HTML render for links containing Markdown formatting --- .../MarkdownToHTMLRenderer.swift | 109 ++++++++++- RiotTests/MarkdownToHTMLRendererTests.swift | 170 ++++++++++++++++++ changelog.d/5355.bugfix | 1 + 3 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 RiotTests/MarkdownToHTMLRendererTests.swift create mode 100644 changelog.d/5355.bugfix diff --git a/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift b/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift index 820928fb20..ca70be749a 100644 --- a/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift +++ b/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift @@ -36,7 +36,14 @@ public class MarkdownToHTMLRenderer: NSObject { extension MarkdownToHTMLRenderer: MarkdownToHTMLRendererProtocol { public func renderToHTML(markdown: String) -> String? { - return try? Down(markdownString: markdown).toHTML(options) + do { + let ast = try DownASTRenderer.stringToAST(markdown, options: options) + ast.repairLinks() + return try DownHTMLRenderer.astToHTML(ast, options: options) + } catch { + MXLog.debug("[MarkdownToHTMLRenderer] renderToHTML failed with string: \(markdown)") + return nil + } } } @@ -50,3 +57,103 @@ public class MarkdownToHTMLRendererHardBreaks: MarkdownToHTMLRenderer { } } + +// MARK: - AST-handling private extensions +private extension CMarkNode { + /// Formatting symbol associated with given note type + /// Note: this is only defined for node types that are handled in repairLinks + var formattingSymbol: String { + switch self.type { + case CMARK_NODE_EMPH: + return "_" + case CMARK_NODE_STRONG: + return "__" + default: + return "" + } + } + + /// Repairs links that were broken down by markdown formatting. + /// Should be used on the first node of libcmark's AST + /// (e.g. the object returned by DownASTRenderer.stringToAST). + func repairLinks() { + let iterator = cmark_iter_new(self) + var text = "" + var isInParagraph = false + var previousNode: CMarkNode? + var shouldUnlinkFormattingMode = false + var event: cmark_event_type? + while event != CMARK_EVENT_DONE { + event = cmark_iter_next(iterator) + + guard let node = cmark_iter_get_node(iterator) else { return } + + if node.type == CMARK_NODE_PARAGRAPH { + if event == CMARK_EVENT_ENTER { + isInParagraph = true + } else { + isInParagraph = false + text = "" + } + } + + if isInParagraph { + switch node.type { + case CMARK_NODE_SOFTBREAK, + CMARK_NODE_LINEBREAK: + text = "" + case CMARK_NODE_TEXT: + if let literal = node.literal { + text += literal + // Reset text if it ends up with a whitespace. + if text.last?.isWhitespace == true { + text = "" + } + // Only the last part could be a link conflicting with next node. + text = String(text.split(separator: " ").last ?? "") + } + case CMARK_NODE_EMPH where previousNode?.type == CMARK_NODE_TEXT, + CMARK_NODE_STRONG where previousNode?.type == CMARK_NODE_TEXT: + if event == CMARK_EVENT_ENTER { + if !text.containedUrls.isEmpty, + let childLiteral = node.pointee.first_child.literal { + // If current text is a link, the formatted text is reverted back to a + // plain text as a part of the link. + let symbol = node.formattingSymbol + let nonFormattedText = "\(symbol)\(childLiteral)\(symbol)" + let replacementTextNode = cmark_node_new(CMARK_NODE_TEXT) + cmark_node_set_literal(replacementTextNode, nonFormattedText) + cmark_node_insert_after(previousNode, replacementTextNode) + cmark_node_set_literal(node.pointee.first_child, "") + let newIterator = cmark_iter_new(node) + _ = cmark_iter_next(newIterator) + cmark_node_unlink(node) + let nextNode = cmark_iter_get_node(newIterator) + cmark_node_insert_after(previousNode, nextNode) + shouldUnlinkFormattingMode = true + } + } else { + if shouldUnlinkFormattingMode { + cmark_node_unlink(node) + shouldUnlinkFormattingMode = false + } + } + default: + break + } + } + previousNode = node + } + } +} + +private extension String { + /// Returns array of URLs detected inside the String. + var containedUrls: [NSTextCheckingResult] { + guard let detector = try? NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue) else { + return [] + } + + return detector.matches(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count)) + } +} diff --git a/RiotTests/MarkdownToHTMLRendererTests.swift b/RiotTests/MarkdownToHTMLRendererTests.swift new file mode 100644 index 0000000000..496308b4aa --- /dev/null +++ b/RiotTests/MarkdownToHTMLRendererTests.swift @@ -0,0 +1,170 @@ +// +// Copyright 2022 New Vector Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import XCTest + +@testable import Riot + +final class MarkdownToHTMLRendererTests: XCTestCase { + // MARK: - Tests + /// Test autolinks HTML render. + func testRenderAutolinks() { + let input = [ + "Test1:", + "<#_foonetic_xkcd:matrix.org>", + "", + "", + "<#_foonetic_xkcd:matrix.org>", + "", + "Test1A:", + "<#_foonetic_xkcd:matrix.org>", + "", + "", + "<#_foonetic_xkcd:matrix.org>", + "", + "Test2:", + "", + "", + "", + "Test3:", + "", + "", + ].joined(separator: "\n") + let expectedOutput = [ + "

Test1:\n<#_foonetic_xkcd:matrix.org>\nhttp://google.com/_thing_\nhttps://matrix.org/_matrix/client/foo/123_\n<#_foonetic_xkcd:matrix.org>

", + "

Test1A:\n<#_foonetic_xkcd:matrix.org>\nhttp://google.com/_thing_\nhttps://matrix.org/_matrix/client/foo/123_\n<#_foonetic_xkcd:matrix.org>

", + "

Test2:\nhttp://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg\nhttp://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg

", + "

Test3:\nhttps://riot.im/app/#/room/#_foonetic_xkcd:matrix.org\nhttps://riot.im/app/#/room/#_foonetic_xkcd:matrix.org

", + "", + ].joined(separator: "\n") + testRenderHTML(input: input, expectedOutput: expectedOutput) + } + + /// Test links with markdown formatting conflict. + func testRenderRepairedLinks() { + let input = [ + "Test1:", + "#_foonetic_xkcd:matrix.org", + "http://google.com/_thing_", + "https://matrix.org/_matrix/client/foo/123_", + "#_foonetic_xkcd:matrix.org", + "", + "Test1A:", + "#_foonetic_xkcd:matrix.org", + "http://google.com/_thing_", + "https://matrix.org/_matrix/client/foo/123_", + "#_foonetic_xkcd:matrix.org", + "", + "Test2:", + "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg", + "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg", + "", + "Test3:", + "https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org", + "https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org", + ].joined(separator: "\n") + let expectedOutput = [ + "

Test1:\n#_foonetic_xkcd:matrix.org\nhttp://google.com/_thing_\nhttps://matrix.org/_matrix/client/foo/123_\n#_foonetic_xkcd:matrix.org

", + "

Test1A:\n#_foonetic_xkcd:matrix.org\nhttp://google.com/_thing_\nhttps://matrix.org/_matrix/client/foo/123_\n#_foonetic_xkcd:matrix.org

", + "

Test2:\nhttp://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg\nhttp://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg

", + "

Test3:\nhttps://riot.im/app/#/room/#_foonetic_xkcd:matrix.org\nhttps://riot.im/app/#/room/#_foonetic_xkcd:matrix.org

", + "", + ].joined(separator: "\n") + testRenderHTML(input: input, expectedOutput: expectedOutput) + } + + /// Test links with markdown strong formatting conflict. + func testRenderRepairedLinksWithStrongFormatting() { + let input = "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py" + + " " + + "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py" + let expectedOutput = "

https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py" + + " " + + "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py

" + + "\n" + testRenderHTML(input: input, expectedOutput: expectedOutput) + } + + /// Test links with markdown formatting conflict and actual markdown in between. + func testRenderRepairedLinksWithMarkdownInBetween() { + let input = "__Some bold text__ " + + "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py" + + " _some emphased text_ " + + "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg" + let expectedOutput = "

Some bold text " + + "https://github.com/matrix-org/synapse/blob/develop/synapse/module_api/__init__.py" + + " some emphased text " + + "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg

" + + "\n" + testRenderHTML(input: input, expectedOutput: expectedOutput) + } + + /// Test links inside codeblocks. + func testRenderLinksInCodeblock() { + let input = "```" + + [ + "Test1:", + "#_foonetic_xkcd:matrix.org", + "http://google.com/_thing_", + "https://matrix.org/_matrix/client/foo/123_", + "#_foonetic_xkcd:matrix.org", + "", + "Test1A:", + "#_foonetic_xkcd:matrix.org", + "http://google.com/_thing_", + "https://matrix.org/_matrix/client/foo/123_", + "#_foonetic_xkcd:matrix.org", + "", + "Test2:", + "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg", + "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg", + "", + "Test3:", + "https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org", + "https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org", + ].joined(separator: "\n") + + "```" + let expectedOutput = [ + "
#_foonetic_xkcd:matrix.org",
+            "http://google.com/_thing_",
+            "https://matrix.org/_matrix/client/foo/123_",
+            "#_foonetic_xkcd:matrix.org",
+            "",
+            "Test1A:",
+            "#_foonetic_xkcd:matrix.org",
+            "http://google.com/_thing_",
+            "https://matrix.org/_matrix/client/foo/123_",
+            "#_foonetic_xkcd:matrix.org",
+            "",
+            "Test2:",
+            "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
+            "http://domain.xyz/foo/bar-_stuff-like-this_-in-it.jpg",
+            "",
+            "Test3:",
+            "https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org",
+            "https://riot.im/app/#/room/#_foonetic_xkcd:matrix.org```",
+            "
", + "", + ].joined(separator: "\n") + testRenderHTML(input: input, expectedOutput: expectedOutput) + } + + // MARK: - Private + private func testRenderHTML(input: String, expectedOutput: String) { + let output = MarkdownToHTMLRenderer().renderToHTML(markdown: input) + XCTAssertEqual(output, expectedOutput) + } +} diff --git a/changelog.d/5355.bugfix b/changelog.d/5355.bugfix new file mode 100644 index 0000000000..131d61af56 --- /dev/null +++ b/changelog.d/5355.bugfix @@ -0,0 +1 @@ +Markdown/HTML: Fix HTTP links containing Markdown formatting From 72f5f17507f41e7458cad26d1acff4c2366d53a5 Mon Sep 17 00:00:00 2001 From: Arnaud Ringenbach Date: Tue, 15 Feb 2022 17:38:11 +0100 Subject: [PATCH 2/3] Log renderToHTML fail as an error --- .../MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift b/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift index ca70be749a..d8914e8076 100644 --- a/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift +++ b/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift @@ -41,7 +41,7 @@ extension MarkdownToHTMLRenderer: MarkdownToHTMLRendererProtocol { ast.repairLinks() return try DownHTMLRenderer.astToHTML(ast, options: options) } catch { - MXLog.debug("[MarkdownToHTMLRenderer] renderToHTML failed with string: \(markdown)") + MXLog.error("[MarkdownToHTMLRenderer] renderToHTML failed with string: \(markdown)") return nil } } From aaaba7d3af582bd6ac8f0f176d411f7c4971aef4 Mon Sep 17 00:00:00 2001 From: Arnaud Ringenbach Date: Thu, 17 Feb 2022 16:12:00 +0100 Subject: [PATCH 3/3] Fix AST nodes memory handling --- .../EventFormatter/MarkdownToHTMLRenderer.swift | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift b/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift index d8914e8076..392e900dda 100644 --- a/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift +++ b/Riot/Modules/MatrixKit/Utils/EventFormatter/MarkdownToHTMLRenderer.swift @@ -38,6 +38,9 @@ extension MarkdownToHTMLRenderer: MarkdownToHTMLRendererProtocol { public func renderToHTML(markdown: String) -> String? { do { let ast = try DownASTRenderer.stringToAST(markdown, options: options) + defer { + cmark_node_free(ast) + } ast.repairLinks() return try DownHTMLRenderer.astToHTML(ast, options: options) } catch { @@ -81,6 +84,7 @@ private extension CMarkNode { var text = "" var isInParagraph = false var previousNode: CMarkNode? + var orphanNodes: [CMarkNode] = [] var shouldUnlinkFormattingMode = false var event: cmark_event_type? while event != CMARK_EVENT_DONE { @@ -124,10 +128,14 @@ private extension CMarkNode { let replacementTextNode = cmark_node_new(CMARK_NODE_TEXT) cmark_node_set_literal(replacementTextNode, nonFormattedText) cmark_node_insert_after(previousNode, replacementTextNode) + // Set child literal to empty string so we dont read it. + // This avoids having to re-create the main + // iterator in the middle of the process. cmark_node_set_literal(node.pointee.first_child, "") let newIterator = cmark_iter_new(node) _ = cmark_iter_next(newIterator) cmark_node_unlink(node) + orphanNodes.append(node) let nextNode = cmark_iter_get_node(newIterator) cmark_node_insert_after(previousNode, nextNode) shouldUnlinkFormattingMode = true @@ -135,6 +143,7 @@ private extension CMarkNode { } else { if shouldUnlinkFormattingMode { cmark_node_unlink(node) + orphanNodes.append(node) shouldUnlinkFormattingMode = false } } @@ -144,6 +153,13 @@ private extension CMarkNode { } previousNode = node } + + // Free all nodes removed from the AST. + // This is done as a last step to avoid messing + // up with the main itertor. + for orphanNode in orphanNodes { + cmark_node_free(orphanNode) + } } }