Skip to content

Commit

Permalink
track passthrough lines in preprocessing: metanorma/metanorma-utils#32
Browse files Browse the repository at this point in the history
  • Loading branch information
opoudjis committed Jul 11, 2024
1 parent f7280ee commit e04d622
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 56 deletions.
59 changes: 15 additions & 44 deletions lib/metanorma/standoc/macros.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,19 @@ def process(parent, reader, attrs)
class NamedEscapePreprocessor < Asciidoctor::Extensions::Preprocessor
def process(document, reader)
c = HTMLEntities.new
p = Metanorma::Utils::LineStatus.new
lines = reader.lines.map do |l|
l.split(/(&[A-Za-z][^&;]*;)/).map do |s|
/^&[A-Za-z]/.match?(s) ? c.encode(c.decode(s), :hexadecimal) : s
end.join
p.process(l)
p.pass ? l : convert(l, c)
end
::Asciidoctor::PreprocessorReader.new document, lines
end

def convert(line, esc)
line.split(/(&[A-Za-z][^&;]*;)/).map do |s|
/^&[A-Za-z]/.match?(s) ? esc.encode(esc.decode(s), :hexadecimal) : s
end.join
end
end

class ColumnBreakBlockMacro < Asciidoctor::Extensions::BlockMacroProcessor
Expand All @@ -75,51 +81,16 @@ def process(parent, _reader, _attrs)
# Not using TreeProcessor because that is still too close to
# inline expressions being processed on access (e.g. titles)
class LinkProtectPreprocessor < Asciidoctor::Extensions::Preprocessor
def init
pass = true # process as passthrough: init = true until
# hit end of doc header
is_delim = false # current line is a no-substititon block delimiter
pass_delim = false # current line is a passthrough delimiter
delimln = "" # delimiter line of current block(s);
# init value looks for end of doc header
{ pass: pass, is_delim: is_delim, pass_delim: pass_delim,
delimln: delimln }
end

def process(document, reader)
p = init
p = Metanorma::Utils::LineStatus.new
lines = reader.lines.map do |t|
p = pass_status(p, t.rstrip)
!p[:pass] && t.include?(":") and t = inlinelinkmacro(inlinelink(t))
p.process(t)
!p.pass && t.include?(":") and t = inlinelinkmacro(inlinelink(t))
t
end
::Asciidoctor::PreprocessorReader.new document, lines
end

def pass_status(status, text)
text == "++++" && !status[:delimln] and status[:pass] = !status[:pass]
status[:midline_docattr] && !/^:[^ :]+: /.match?(text) and
status[:midline_docattr] = false
if (status[:is_delim] && /^(-+|\*+|=+|_+)$/.match?(text)) ||
(!status[:is_delim] && !status[:delimln] && /^-----*$|^\.\.\.\.\.*$/.match?(text))
status[:delimln] = text
status[:pass] = true
elsif status[:pass_delim]
status[:delimln] = "" # end of paragraph for paragraph with [pass]
elsif status[:delimln] && text == status[:delimln]
status[:pass] = false
status[:delimln] = nil
elsif /^:[^ :]+: /.match?(text) &&
(status[:prev_line].empty? || status[:midline_docattr])
status[:pass] = true
status[:midline_docattr] = true
end
status[:is_delim] = /^\[(source|listing|literal|pass)\b/.match?(text)
status[:pass_delim] = /^\[(pass)\b/.match?(text)
status[:prev_line] = text.strip
status
end

PASS_INLINE_MACROS = %w(pass pass-format identifier std-link stem)
.join("|").freeze

Expand All @@ -133,7 +104,7 @@ def pass_status(status, text)
\\[.*?(?<!\\\\)\\] # [ ... ] not preceded by \\
)
REGEX
PASS_INLINE_MACRO_RX = /#{PASS_INLINE_MACRO_STR}/xo.freeze
PASS_INLINE_MACRO_RX = /#{PASS_INLINE_MACRO_STR}/xo

def pass_inline_split(text)
text.split(PASS_INLINE_MACRO_RX).each.map do |x|
Expand All @@ -143,7 +114,7 @@ def pass_inline_split(text)

# InlineLinkRx = %r((^|link:|#{CG_BLANK}|&lt;|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m
#
InlineLinkRx = %r((^|(?<![-\\])\blink:(?!\+)|\p{Blank}|&lt;|[<>\(\)\[\];"'])((?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)(?:(\[(|.*?[^\\])\])|([^\s\[\]<]*([^\s,.?!\[\]<\)])))))m.freeze
InlineLinkRx = %r((^|(?<![-\\])\blink:(?!\+)|\p{Blank}|&lt;|[<>\(\)\[\];"'])((?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)(?:(\[(|.*?[^\\])\])|([^\s\[\]<]*([^\s,.?!\[\]<\)])))))m

def inlinelink(text)
text.include?("://") or return text
Expand Down Expand Up @@ -173,7 +144,7 @@ def inlinelink_escape(text)
(|[^:\\s\\[][^\\s\\[]*) # link: ... up to [
(\\[(|.*?[^\\\\])\\]) # [ ... ], no ]
REGEX
InlineLinkMacroRx = /#{InlineLinkMacroRx1}/x.freeze
InlineLinkMacroRx = /#{InlineLinkMacroRx1}/x

def inlinelinkmacro(text)
(text.include?("[") &&
Expand Down
26 changes: 14 additions & 12 deletions lib/metanorma/standoc/macros_embed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,19 @@ class EmbedIncludeProcessor < Asciidoctor::Extensions::Preprocessor
def process(doc, reader)
reader.eof? and return reader
r = ::Asciidoctor::PreprocessorNoIfdefsReader.new doc, reader.lines
p = Metanorma::Utils::LineStatus.new
lines = r.readlines
headings = lines.grep(/^== /).map(&:strip)
ret = lines.each_with_object(embed_acc(doc, r)) do |line, m|
process_line(line, m, headings)
process_line(line, m, headings, p)
end
return_to_document(doc, ret)
end

def embed_acc(doc, reader)
{ lines: [], hdr: [], id: [],
orig: doc, doc: doc, file: nil, path: nil,
reader: reader, prev: nil }
orig: doc, doc:, file: nil, path: nil,
reader:, prev: nil }
end

# presupposes single embed
Expand Down Expand Up @@ -93,13 +94,14 @@ def flatten_embeds(emb)

def update_embeds(lines, acc, emb)
lines.empty? or
acc << { file: emb[:file], path: emb[:path], lines: lines }
acc << { file: emb[:file], path: emb[:path], lines: }
[[], acc]
end

def process_line(line, acc, headings)
if /^embed::/.match?(line)
e = embed(line, acc, headings)
def process_line(line, acc, headings, status)
status.process(line)
if !status.pass && /^embed::/.match?(line)
e = embed(line, acc, headings, status)
acc = process_embed(acc, e, acc[:prev])
else
acc[:lines] << line
Expand Down Expand Up @@ -148,7 +150,7 @@ def read(inc_path)
end
end

def embed(line, acc, headings)
def embed(line, acc, headings, status)
fname, inc_path = filename(line, acc)
lines = filter_sections(read(inc_path), headings)
n = Asciidoctor::Document
Expand All @@ -158,12 +160,12 @@ def embed(line, acc, headings)
.merge(file: fname, path: inc_path, orig: acc[:orig])
ret[:hdr] or
raise "Embedding an incomplete document with no header: #{ret[:path]}"
embed_recurse(ret, n, r, headings)
embed_recurse(ret, n, r, headings, status)
end

def embed_recurse(ret, doc, reader, headings)
def embed_recurse(ret, doc, reader, headings, status)
ret1 = ret[:lines].each_with_object(embed_acc(doc, reader)) do |line, m|
process_line(line, m, headings)
process_line(line, m, headings, status)
end
ret.merge(
{ lines: ret1[:lines], id: ret[:id] + ret1[:id],
Expand All @@ -172,7 +174,7 @@ def embed_recurse(ret, doc, reader, headings)
end

def strip_header(lines)
return { lines: lines, hdr: nil } unless !lines.empty? &&
return { lines:, hdr: nil } unless !lines.empty? &&
lines.first.start_with?("= ")

skip = true
Expand Down
90 changes: 90 additions & 0 deletions spec/vcr_cassettes/iev_03-01-02.yml

Large diffs are not rendered by default.

0 comments on commit e04d622

Please sign in to comment.