Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

track passthrough lines in preprocessing: https://github.com/metanorm… #893

Merged
merged 1 commit into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 15 additions & 44 deletions lib/metanorma/standoc/macros.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,19 @@
class NamedEscapePreprocessor < Asciidoctor::Extensions::Preprocessor
def process(document, reader)
c = HTMLEntities.new
p = Metanorma::Utils::LineStatus.new
lines = reader.lines.map do |l|
l.split(/(&[A-Za-z][^&;]*;)/).map do |s|
/^&[A-Za-z]/.match?(s) ? c.encode(c.decode(s), :hexadecimal) : s
end.join
p.process(l)
p.pass ? l : convert(l, c)
end
::Asciidoctor::PreprocessorReader.new document, lines
end

def convert(line, esc)
line.split(/(&[A-Za-z][^&;]*;)/).map do |s|
/^&[A-Za-z]/.match?(s) ? esc.encode(esc.decode(s), :hexadecimal) : s
end.join
end
end

class ColumnBreakBlockMacro < Asciidoctor::Extensions::BlockMacroProcessor
Expand All @@ -75,51 +81,16 @@
# Not using TreeProcessor because that is still too close to
# inline expressions being processed on access (e.g. titles)
class LinkProtectPreprocessor < Asciidoctor::Extensions::Preprocessor
def init
pass = true # process as passthrough: init = true until
# hit end of doc header
is_delim = false # current line is a no-substititon block delimiter
pass_delim = false # current line is a passthrough delimiter
delimln = "" # delimiter line of current block(s);
# init value looks for end of doc header
{ pass: pass, is_delim: is_delim, pass_delim: pass_delim,
delimln: delimln }
end

def process(document, reader)
p = init
p = Metanorma::Utils::LineStatus.new
lines = reader.lines.map do |t|
p = pass_status(p, t.rstrip)
!p[:pass] && t.include?(":") and t = inlinelinkmacro(inlinelink(t))
p.process(t)
!p.pass && t.include?(":") and t = inlinelinkmacro(inlinelink(t))
t
end
::Asciidoctor::PreprocessorReader.new document, lines
end

def pass_status(status, text)
text == "++++" && !status[:delimln] and status[:pass] = !status[:pass]
status[:midline_docattr] && !/^:[^ :]+: /.match?(text) and
status[:midline_docattr] = false
if (status[:is_delim] && /^(-+|\*+|=+|_+)$/.match?(text)) ||
(!status[:is_delim] && !status[:delimln] && /^-----*$|^\.\.\.\.\.*$/.match?(text))
status[:delimln] = text
status[:pass] = true
elsif status[:pass_delim]
status[:delimln] = "" # end of paragraph for paragraph with [pass]
elsif status[:delimln] && text == status[:delimln]
status[:pass] = false
status[:delimln] = nil
elsif /^:[^ :]+: /.match?(text) &&
(status[:prev_line].empty? || status[:midline_docattr])
status[:pass] = true
status[:midline_docattr] = true
end
status[:is_delim] = /^\[(source|listing|literal|pass)\b/.match?(text)
status[:pass_delim] = /^\[(pass)\b/.match?(text)
status[:prev_line] = text.strip
status
end

PASS_INLINE_MACROS = %w(pass pass-format identifier std-link stem)
.join("|").freeze

Expand All @@ -133,7 +104,7 @@
\\[.*?(?<!\\\\)\\] # [ ... ] not preceded by \\
)
REGEX
PASS_INLINE_MACRO_RX = /#{PASS_INLINE_MACRO_STR}/xo.freeze
PASS_INLINE_MACRO_RX = /#{PASS_INLINE_MACRO_STR}/xo

def pass_inline_split(text)
text.split(PASS_INLINE_MACRO_RX).each.map do |x|
Expand All @@ -143,7 +114,7 @@

# InlineLinkRx = %r((^|link:|#{CG_BLANK}|&lt;|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m
#
InlineLinkRx = %r((^|(?<![-\\])\blink:(?!\+)|\p{Blank}|&lt;|[<>\(\)\[\];"'])((?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)(?:(\[(|.*?[^\\])\])|([^\s\[\]<]*([^\s,.?!\[\]<\)])))))m.freeze
InlineLinkRx = %r((^|(?<![-\\])\blink:(?!\+)|\p{Blank}|&lt;|[<>\(\)\[\];"'])((?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)(?:(\[(|.*?[^\\])\])|([^\s\[\]<]*([^\s,.?!\[\]<\)])))))m

def inlinelink(text)
text.include?("://") or return text
Expand All @@ -154,15 +125,15 @@
end

def inlinelink_escape(text)
text.gsub(InlineLinkRx) do
body, suffix = $4.nil? ? [$3 + $6, "[]"] : [$3, ""]
p = $1 and s = $2 and b = $4
if p == "link:" then "#{p}++#{s}#{body}++#{b}#{suffix}"
elsif p == "<"
"#{p}link:++#{s}#{body.sub(/>$/, '')}++#{b}#{suffix}>"
else "#{p}link:++#{s}#{body}++#{b}#{suffix}"
end
end

Check failure

Code scanning / CodeQL

Polynomial regular expression used on uncontrolled data High

This
regular expression
that depends on a
library input
may run slow on strings starting with 'irc://!' and with many repetitions of '!!'.
end

# InlineLinkMacroRx = /\\?(?:link|(mailto)):(|[^:\s\[][^\s\[]*)\[(|#{CC_ALL}*?[^\\])\]/m
Expand All @@ -173,7 +144,7 @@
(|[^:\\s\\[][^\\s\\[]*) # link: ... up to [
(\\[(|.*?[^\\\\])\\]) # [ ... ], no ]
REGEX
InlineLinkMacroRx = /#{InlineLinkMacroRx1}/x.freeze
InlineLinkMacroRx = /#{InlineLinkMacroRx1}/x

def inlinelinkmacro(text)
(text.include?("[") &&
Expand Down
26 changes: 14 additions & 12 deletions lib/metanorma/standoc/macros_embed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,19 @@ class EmbedIncludeProcessor < Asciidoctor::Extensions::Preprocessor
def process(doc, reader)
reader.eof? and return reader
r = ::Asciidoctor::PreprocessorNoIfdefsReader.new doc, reader.lines
p = Metanorma::Utils::LineStatus.new
lines = r.readlines
headings = lines.grep(/^== /).map(&:strip)
ret = lines.each_with_object(embed_acc(doc, r)) do |line, m|
process_line(line, m, headings)
process_line(line, m, headings, p)
end
return_to_document(doc, ret)
end

def embed_acc(doc, reader)
{ lines: [], hdr: [], id: [],
orig: doc, doc: doc, file: nil, path: nil,
reader: reader, prev: nil }
orig: doc, doc:, file: nil, path: nil,
reader:, prev: nil }
end

# presupposes single embed
Expand Down Expand Up @@ -93,13 +94,14 @@ def flatten_embeds(emb)

def update_embeds(lines, acc, emb)
lines.empty? or
acc << { file: emb[:file], path: emb[:path], lines: lines }
acc << { file: emb[:file], path: emb[:path], lines: }
[[], acc]
end

def process_line(line, acc, headings)
if /^embed::/.match?(line)
e = embed(line, acc, headings)
def process_line(line, acc, headings, status)
status.process(line)
if !status.pass && /^embed::/.match?(line)
e = embed(line, acc, headings, status)
acc = process_embed(acc, e, acc[:prev])
else
acc[:lines] << line
Expand Down Expand Up @@ -148,7 +150,7 @@ def read(inc_path)
end
end

def embed(line, acc, headings)
def embed(line, acc, headings, status)
fname, inc_path = filename(line, acc)
lines = filter_sections(read(inc_path), headings)
n = Asciidoctor::Document
Expand All @@ -158,12 +160,12 @@ def embed(line, acc, headings)
.merge(file: fname, path: inc_path, orig: acc[:orig])
ret[:hdr] or
raise "Embedding an incomplete document with no header: #{ret[:path]}"
embed_recurse(ret, n, r, headings)
embed_recurse(ret, n, r, headings, status)
end

def embed_recurse(ret, doc, reader, headings)
def embed_recurse(ret, doc, reader, headings, status)
ret1 = ret[:lines].each_with_object(embed_acc(doc, reader)) do |line, m|
process_line(line, m, headings)
process_line(line, m, headings, status)
end
ret.merge(
{ lines: ret1[:lines], id: ret[:id] + ret1[:id],
Expand All @@ -172,7 +174,7 @@ def embed_recurse(ret, doc, reader, headings)
end

def strip_header(lines)
return { lines: lines, hdr: nil } unless !lines.empty? &&
return { lines:, hdr: nil } unless !lines.empty? &&
lines.first.start_with?("= ")

skip = true
Expand Down
90 changes: 90 additions & 0 deletions spec/vcr_cassettes/iev_03-01-02.yml

Large diffs are not rendered by default.

Loading