Skip to content

Commit

Permalink
prevent character autosubstitution in URIs: #808
Browse files Browse the repository at this point in the history
  • Loading branch information
opoudjis committed Aug 9, 2023
1 parent 6e69c03 commit ee7785f
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 15 deletions.
3 changes: 2 additions & 1 deletion lib/metanorma/standoc/converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
require_relative "utils"
require_relative "cleanup"
require_relative "reqt"
require_relative "./macros"
require_relative "macros"

module Metanorma
module Standoc
Expand All @@ -22,6 +22,7 @@ class Converter
Asciidoctor::Extensions.register do
preprocessor Metanorma::Standoc::EmbedIncludeProcessor
preprocessor Metanorma::Standoc::NamedEscapePreprocessor
preprocessor Metanorma::Standoc::LinkProtectPreprocessor
preprocessor Metanorma::Standoc::Datamodel::AttributesTablePreprocessor
preprocessor Metanorma::Standoc::Datamodel::DiagramPreprocessor
preprocessor Metanorma::Plugin::Datastruct::Json2TextPreprocessor
Expand Down
130 changes: 118 additions & 12 deletions lib/metanorma/standoc/macros.rb
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
require "uuidtools"
require "yaml"
require "csv"
require_relative "./macros_inline"
require_relative "./macros_plantuml"
require_relative "./macros_terms"
require_relative "./macros_form"
require_relative "./macros_note"
require_relative "./macros_embed"
require_relative "./datamodel/attributes_table_preprocessor"
require_relative "./datamodel/diagram_preprocessor"
require_relative "macros_inline"
require_relative "macros_plantuml"
require_relative "macros_terms"
require_relative "macros_form"
require_relative "macros_note"
require_relative "macros_embed"
require_relative "datamodel/attributes_table_preprocessor"
require_relative "datamodel/diagram_preprocessor"
require "metanorma-plugin-datastruct"
require "metanorma-plugin-glossarist"
require "metanorma-plugin-lutaml"
Expand All @@ -22,7 +22,7 @@ class PseudocodeBlockMacro < Asciidoctor::Extensions::BlockProcessor

def init_indent(line)
/^(?<prefix>[ \t]*)(?<suffix>.*)$/ =~ line
prefix = prefix.gsub(/\t/, "\u00a0\u00a0\u00a0\u00a0")
prefix = prefix.gsub("\t", "\u00a0\u00a0\u00a0\u00a0")
.gsub(/ /, "\u00a0")
prefix + suffix
end
Expand All @@ -31,9 +31,9 @@ def supply_br(lines)
ignore = false
lines.each_with_index do |l, i|
/^(--+|====+|\|===|\.\.\.\.+|\*\*\*\*+|\+\+\+\++|````+|____\+)$/
.match(l) && (ignore = !ignore)
next if l.empty? || l.match(/ \+$/) || /^\[.*\]$/.match?(l) || ignore
next if i == lines.size - 1 ||
.match(l) and (ignore = !ignore)
next if l.empty? || l.match(/ \+$/) || /^\[.*\]$/.match?(l) ||
ignore || i == lines.size - 1 ||
(i < lines.size - 1 && lines[i + 1].empty?)

lines[i] += " +"
Expand All @@ -60,5 +60,111 @@ def process(_document, reader)
::Asciidoctor::Reader.new lines
end
end

# refer https://github.com/asciidoctor/asciidoctor/blob/main/lib/asciidoctor/substitutors.rb
# Not using TreeProcessor because that is still too close to
# inline expressions being processed on access (e.g. titles)
class LinkProtectPreprocessor < Asciidoctor::Extensions::Preprocessor
def init
pass = true # process as passthrough: init = true until
# hit end of doc header
is_delim = false # current line is a no-substititon block delimiter
pass_delim = false # current line is a passthrough delimiter
delimln = "" # delimiter line of current block(s);
# init value looks for end of doc header
{ pass: pass, is_delim: is_delim, pass_delim: pass_delim,
delimln: delimln }
end

def process(_document, reader)
p = init
lines = reader.readlines.map do |t|
p = pass_status(p, t.rstrip)
!p[:pass] && t.include?(":") and t = inlinelinkmacro(inlinelink(t))
t
end
::Asciidoctor::Reader.new lines
end

def pass_status(status, text)
text == "++++" && !status[:delimln] and status[:pass] = !status[:pass]
if status[:is_delim] && /^(-+|\*+|=+|_+)$/.match?(text)
status[:delimln] = text
status[:pass] = true
elsif status[:pass_delim]
status[:delimln] = "" # end of paragraph for paragraph with [pass]
elsif status[:delimln] && text == status[:delimln]
status[:pass] = false
status[:delimln] = nil
end
status[:is_delim] = /^\[(source|listing|literal|pass)\b/.match?(text)
status[:pass_delim] = /^\[(pass)\b/.match?(text)
status
end

PASS_INLINE_MACROS = %w(pass pass-format identifier std-link stem)
.join("|").freeze

PASS_INLINE_MACRO_STR = <<~REGEX.freeze
(
\\b(?<!-) # word-separator, no hyphen
(?: # don't capture these!
(?:#{PASS_INLINE_MACROS}):[^\\s\\[]* | # macro name, :, second key. OR:
span:uri \\b [^\\s\\[]* # span:uri, third key
)
\\[.*?(?<!\\\\)\\] # [ ... ] not preceded by \\
)
REGEX
PASS_INLINE_MACRO_RX = /#{PASS_INLINE_MACRO_STR}/xo.freeze

def pass_inline_split(text)
text.split(PASS_INLINE_MACRO_RX).each.map do |x|
PASS_INLINE_MACRO_RX.match?(x) ? x : yield(x)
end
end

# InlineLinkRx = %r((^|link:|#{CG_BLANK}|&lt;|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m
#
InlineLinkRx = %r((^|(?<!-)\blink:(?!\+)|\p{Blank}|&lt;|[<>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)(?:(\[(|.*?[^\\])\])|([^\s\[\]<]*([^\s,.?!\[\]<\)])))))m.freeze

def inlinelink(text)
text.include?("://") or return text
pass_inline_split(text) do |x|
inlinelink_escape(x)
end.join
end

def inlinelink_escape(text)
text.gsub(InlineLinkRx) do
body, suffix = $4.nil? ? [$3 + $6, "[]"] : [$3, ""]
p = $1 and s = $2 and b = $4
if p == "link:" then "#{p}++#{s}#{body}++#{b}#{suffix}"
elsif p == "<"
"#{p}link:++#{s}#{body.sub(/>$/, '')}++#{b}#{suffix}>"
else "#{p}link:++#{s}#{body}++#{b}#{suffix}"
end
end
end

# InlineLinkMacroRx = /\\?(?:link|(mailto)):(|[^:\s\[][^\s\[]*)\[(|#{CC_ALL}*?[^\\])\]/m
InlineLinkMacroRx1 = <<~REGEX.freeze
(\\\\?\\b(?<!-) # optional backslash, no hyphen, word boundary
(?:link|mailto):) # link: or mailto:
(?!\\+) # no link:+ passthrough
(|[^:\\s\\[][^\\s\\[]*) # link: ... up to [
(\\[(|.*?[^\\\\])\\]) # [ ... ], no ]
REGEX
InlineLinkMacroRx = /#{InlineLinkMacroRx1}/x.freeze

def inlinelinkmacro(text)
(text.include?("[") &&
((text.include? "link:") || (text.include? "ilto:"))) or return text
pass_inline_split(text) do |x|
x.gsub(InlineLinkMacroRx) do
"#{$1}++#{$2}++#{$3}"
end
end.join
end
end
end
end
4 changes: 2 additions & 2 deletions lib/metanorma/standoc/macros_inline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def process(parent, target, attr)
text = attr["text"]
text = "((#{text}))" unless /^\(\(.+\)\)$/.match?(text)
out = parent.sub_macros(text)
out.sub(/<index>/, "<index to='#{target}'>")
out.sub("<index>", "<index to='#{target}'>")
end
end

Expand Down Expand Up @@ -148,7 +148,7 @@ def process(parent, _target, attrs)
content = CSV.parse_line(out).map do |x|
x.sub!(/^(["'])(.+)\1/, "\\2")
m = /^(.*?)(:\d+)?$/.match(x)
%{<toc-xpath depth='#{m[2]&.sub(/:/, '') || 1}'>#{m[1]}</toc-xpath>}
%{<toc-xpath depth='#{m[2]&.sub(':', '') || 1}'>#{m[1]}</toc-xpath>}
end.join
"<toc>#{content}</toc>"
end
Expand Down
77 changes: 77 additions & 0 deletions spec/metanorma/cleanup_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2111,6 +2111,83 @@
.to be_equivalent_to(xmlpp(output))
end
it "do not apply substitutions to links" do
input = <<~INPUT
#{ASCIIDOC_BLANK_HDR}
== Clause
http://www.example.com/...abc

<http://www.example.com/...abc>

a http://www.example.com/...abc

http://www.example.com/...abc[]

http://www.example.com/...abc[x]

++http://www.example.com++

https://isotc.iso.org/livelink/livelink/fetch/-15620806/15620808/15623592/15768654/TMB_resolutions_-_2012_%28Resolution_1-148%29.pdf?nodeid=15768229&vernum=-2

https://isotc.iso.org/livelink/livelink/fetch/-15620806/15620808/15623592/15768654/TMB_resolutions_-_2012_%28Resolution_1-148%29.pdf?nodeid=15768229&vernum=-2[TMB Resolution 8/2012]

link:http://www...com[]
<link:http://www...com[]>

a link:http://www...com[]
link:++http://www...com++[]

++++
<a xmlns="http://www.example.com"/>
++++

pass:q[http://www.example.com]
And pass:[http://www.example.com] and pass:a,q[http://www.example.com]

INPUT
output = <<~OUTPUT
<clause id="_" inline-header="false" obligation="normative">
<title>Clause</title>
<p id="_">
<link target="http://www.example.com/...abc"/>
</p>
<p id="_">&lt;<link target="http://www.example.com/...abc"/>&gt;</p>
<p id="_">a <link target="http://www.example.com/...abc"/></p>
<p id="_">
<link target="http://www.example.com/...abc"/>
</p>
<p id="_">
<link target="http://www.example.com/...abc">x</link>
</p>
<p id="_">http://www.example.com</p>
<p id="_">
<link target="https://isotc.iso.org/livelink/livelink/fetch/-15620806/15620808/15623592/15768654/TMB_resolutions_-_2012_%28Resolution_1-148%29.pdf?nodeid=15768229&amp;vernum=-2"/>
</p>
<p id="_">
<link target="https://isotc.iso.org/livelink/livelink/fetch/-15620806/15620808/15623592/15768654/TMB_resolutions_-_2012_%28Resolution_1-148%29.pdf?nodeid=15768229&amp;vernum=-2">TMB Resolution 8/2012</link>
</p>
<p id="_">
<link target="http://www...com"/>
</p>
<p id="_">&lt;<link target="http://www...com"/>&gt;</p>
<p id="_">a <link target="http://www...com"/></p>
<p id="_">
<link target="http://www...com"/>
</p>
<a xmlns="http://www.example.com"/>
<p id="_">http://www.example.com
And http://www.example.com and http://www.example.com</p>
</clause>
OUTPUT
ret = Nokogiri::XML(Asciidoctor.convert(input, *OPTIONS))
expect(xmlpp(strip_guid(ret.at("//xmlns:clause").to_xml)))
.to be_equivalent_to(xmlpp(output))
end
private
def mock_mathml_italicise(string)
Expand Down

0 comments on commit ee7785f

Please sign in to comment.