Skip to content

Commit

Permalink
Merge pull request #907 from metanorma/fix/linebreak-sanitise
Browse files Browse the repository at this point in the history
remove linebreaks in Metanorma postprocessing not on reading blocks: …
  • Loading branch information
opoudjis authored Aug 6, 2024
2 parents 892b72a + 3c640fd commit 1489e35
Show file tree
Hide file tree
Showing 19 changed files with 198 additions and 114 deletions.
2 changes: 2 additions & 0 deletions Gemfile.devel
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
gem "metanorma-utils", git: "https://github.com/metanorma/metanorma-utils", branch: "fix/linebreak-sanitise"
gem "mn-requirements", git: "https://github.com/metanorma/mn-requirements", branch: "fix/linebreak-sanitise"
12 changes: 6 additions & 6 deletions lib/metanorma/standoc/anchor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def inline_anchor(node)
def inline_anchor_ref(node)
noko do |xml|
xml.bookmark nil, **attr_code(id: node.id)
end.join
end
end

def inline_anchor_xref(node)
Expand All @@ -26,7 +26,7 @@ def inline_anchor_xref(node)
xml.xref **attr_code(attrs) do |x|
x << c
end
end.join
end
end

def inline_anchor_xref_attrs(node)
Expand Down Expand Up @@ -83,7 +83,7 @@ def inline_anchor_link(node)
xml.link **attr_code(attributes) do |l|
l << contents
end
end.join
end
end

def inline_anchor_link_attrs(node)
Expand All @@ -103,7 +103,7 @@ def inline_anchor_bibref(node)
xml.ref **attr_code(id: node.target || node.id) do |r|
r << eref_contents
end
end.join
end
end

def inline_anchor_bibref_contents(node)
Expand All @@ -114,7 +114,7 @@ def inline_anchor_bibref_contents(node)
def inline_callout(node)
noko do |xml|
xml.callout node.text
end.join
end
end

def inline_footnote(node)
Expand All @@ -124,7 +124,7 @@ def inline_footnote(node)
xml.fn reference: @fn_number do |fn|
fn.p { |p| p << node.text }
end
end.join
end
end
end
end
Expand Down
10 changes: 5 additions & 5 deletions lib/metanorma/standoc/blocks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def term_example(node)
)) do |ex|
wrap_in_para(node, ex)
end
end.join("")
end
end

def example(node)
Expand Down Expand Up @@ -128,7 +128,7 @@ def pseudocode_example(node)
figure_title(node, ex)
wrap_in_para(node, ex)
end
end.join("")
end
end

def example_attrs(node)
Expand All @@ -141,7 +141,7 @@ def example_proper(node)
node.title.nil? or ex.name { |name| name << node.title }
wrap_in_para(node, ex)
end
end.join("")
end
end

def para_attrs(node)
Expand All @@ -158,7 +158,7 @@ def paragraph(node)
xml.p **para_attrs(node) do |xml_t|
xml_t << node.content
end
end.join("")
end
end

def quote_attrs(node)
Expand All @@ -183,7 +183,7 @@ def quote(node)
quote_attribution(node, q)
wrap_in_para(node, q)
end
end.join("")
end
end

def listing_attrs(node)
Expand Down
4 changes: 2 additions & 2 deletions lib/metanorma/standoc/blocks_image.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def svgmap_example(node)
figure_title(node, ex)
ex << node.content
end
end.join("\n")
end
end

def figure_example(node)
Expand All @@ -26,7 +26,7 @@ def figure_example(node)
node.title.nil? or ex.name { |name| name << node.title }
wrap_in_para(node, ex)
end
end.join("")
end
end

def figure_title(node, out)
Expand Down
10 changes: 5 additions & 5 deletions lib/metanorma/standoc/blocks_notes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def sidebar(node)
xml.review **sidebar_attrs(node) do |r|
wrap_in_para(node, r)
end
end.join("")
end
end

def todo_attrs(node)
Expand All @@ -49,15 +49,15 @@ def todo(node)
xml.review **todo_attrs(node) do |r|
wrap_in_para(node, r)
end
end.join("")
end
end

def termnote(node)
noko do |xml|
xml.termnote **termnote_attrs(node) do |ex|
wrap_in_para(node, ex)
end
end.join("")
end
end

def note(node)
Expand All @@ -67,7 +67,7 @@ def note(node)
xml.note **note_attrs(node) do |c|
wrap_in_para(node, c)
end
end.join("")
end
end

def boilerplate_note(node)
Expand Down Expand Up @@ -101,7 +101,7 @@ def admonition(node)
node.title.nil? or a.name { |name| name << node.title }
wrap_in_para(node, a)
end
end.join("")
end
end

def admonition_alternatives(node)
Expand Down
3 changes: 2 additions & 1 deletion lib/metanorma/standoc/cleanup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def cleanup(xmldoc)
boilerplate_cleanup(xmldoc)
toc_cleanup(xmldoc)
smartquotes_cleanup(xmldoc)
linebreak_cleanup(xmldoc)
variant_cleanup(xmldoc)
para_cleanup(xmldoc)
empty_element_cleanup(xmldoc)
Expand Down Expand Up @@ -137,7 +138,7 @@ def empty_element_cleanup(xmldoc)
end

def element_name_cleanup(xmldoc)
xmldoc.traverse { |n| n.name = n.name.gsub("_", "-") }
xmldoc.traverse { |n| n.name = n.name.tr("_", "-") }
end

# allows us to deal with doc relation localities,
Expand Down
2 changes: 1 addition & 1 deletion lib/metanorma/standoc/cleanup_symbols.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module Cleanup
def symbol_key(sym)
@c.decode(asciimath_key(sym).text)
.gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
.gsub(/[[:punct:]]|[_^]/, ":\\0").gsub("`", "")
.gsub(/[[:punct:]]|[_^]/, ":\\0").delete("`")
.gsub(/[0-9]+/, \\0")
.tr("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
"ABCFEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
Expand Down
6 changes: 3 additions & 3 deletions lib/metanorma/standoc/cleanup_terms_designations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def term_dl_to_designation_metadata(prev, dlist)
%w(absent geographic-area).each do |a|
dl_to_attrs(prev, dlist, a)
end
%w(field-of-application usage-info).reverse.each do |a|
%w(field-of-application usage-info).reverse_each do |a|
dl_to_elems(prev.at("./expression"), prev, dlist, a)
end
end
Expand Down Expand Up @@ -78,7 +78,7 @@ def term_dl_to_expression_root_metadata(prev, dlist)
end

def term_dl_to_expression_name_metadata(prev, dlist)
%w(abbreviation-type pronunciation).reverse.each do |a|
%w(abbreviation-type pronunciation).reverse_each do |a|
dl_to_elems(prev.at("./expression/name"), prev, dlist, a)
end
g = dlist.at("./dt[text()='grammar']/following::dd//dl") and
Expand All @@ -89,7 +89,7 @@ def term_dl_to_expression_grammar(prev, dlist)
prev.at(".//expression") or return
prev.at(".//expression") << "<grammar><sentinel/></grammar>"
%w(gender number isPreposition isParticiple isAdjective isAdverb isNoun
grammar-value).reverse.each do |a|
grammar-value).reverse_each do |a|
dl_to_elems(prev.at(".//expression/grammar/*"), prev.elements.last,
dlist, a)
end
Expand Down
69 changes: 64 additions & 5 deletions lib/metanorma/standoc/cleanup_text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,54 @@ def textcleanup(result)
text
end

def ancestor_include?(elem, ancestors)
path = elem.path.gsub(/\[\d+\]/, "").split(%r{/})[1..-2]
!path.intersection(ancestors).empty?
end

def linebreak_cleanup(xmldoc)
xmldoc.traverse do |x|
x.text? && x.text.include?("\n") or next
ancestor_include?(x, PRESERVE_LINEBREAK_ELEMENTS) and next
ancestor_include?(x, STRIP_LINEBREAK_ELEMENTS) or next
x.replace(Metanorma::Utils
.line_sanitise(x.text.lines.map(&:rstrip)).join)
end
end

# process example/p, example/sourcecode, not example on its own:
# this is about stripping lines for blocks containing inline elems & text
def linebreak_cleanup(xmldoc)
xmldoc.xpath(STRIP_LINEBREAK_ELEMENTS.map { |e| "//#{e}" }.join(" | "))
.each do |b|
b.xpath(STRIP_LINEBREAK_ELEMENTS.map { |e| ".//#{e}" }.join(" | "))
.empty? or next
linebreak_cleanup_block(gather_text_for_linebreak_cleanup(b))
end
end

def linebreak_cleanup_block(block)
block.each_with_index do |e, i|
e[:skip] and next
lines = e[:text].lines.map(&:rstrip)
e[:last] or lines << block[i + 1][:text].lines.first # next token context
out = Metanorma::Utils.line_sanitise(lines)
e[:last] or out.pop
e[:elem].replace(out.join)
end
end

def gather_text_for_linebreak_cleanup(block)
x = block.xpath(".//text()").map do |e|
{ elem: e, text: e.text,
skip: ancestor_include?(e, PRESERVE_LINEBREAK_ELEMENTS) }
end
x.empty? and return x
x.each { |e| e[:skip] ||= !e[:text].include?("\n") }
x[-1][:last] = true
x
end

def smartquotes_cleanup(xmldoc)
xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
if @smartquotes then smartquotes_cleanup1(xmldoc)
Expand All @@ -37,10 +85,20 @@ def uninterrupt_quotes_around_xml(xmldoc)
%w(pre tt sourcecode stem asciimath figure bibdata passthrough
identifier metanorma-extension).freeze

PRESERVE_LINEBREAK_ELEMENTS =
%w(pre sourcecode passthrough metanorma-extension).freeze

STRIP_LINEBREAK_ELEMENTS =
%w(title name variant-title figure example review admonition
note li th td dt dd p quote label annotation
preferred admitted related deprecates field-of-application
usage-info expression pronunciation grammar-value domain
definition termnote termexample modification description
newcontent floating-title).freeze

def uninterrupt_quotes_around_xml_skip(elem)
!(/\A['"]/.match?(elem.text) &&
elem.previous.path.gsub(/\[\d+\]/, "").split(%r{/})[1..-2]
.intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
!ancestor_include?(elem.previous, IGNORE_QUOTES_ELEMENTS) &&
((elem.previous.text.strip.empty? &&
!empty_tag_with_text_content?(elem.previous)) ||
ignoretext?(elem.previous)))
Expand Down Expand Up @@ -69,7 +127,7 @@ def block?(elem)
abstract preferred admitted related deprecates field-of-application
usage-info expression pronunciation grammar-value domain
definition termnote termexample modification description
newcontent floating-title tab).include? elem.name
newcontent floating-title tab review admonition annotation).include? elem.name
end

def empty_tag_with_text_content?(elem)
Expand All @@ -83,8 +141,9 @@ def dumb2smart_quotes(xmldoc)
empty_tag_with_text_content?(x) and prev = "dummy"
x.text? or next

ancestors = x.path.gsub(/\[\d+\]/, "").split(%r{/})[1..-2]
ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? or next
# ancestors = x.path.gsub(/\[\d+\]/, "").split(%r{/})[1..-2]
# ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? or next
ancestor_include?(x, IGNORE_QUOTES_ELEMENTS) and next
dumb2smart_quotes1(x, prev)
prev = x.text
end
Expand Down
12 changes: 6 additions & 6 deletions lib/metanorma/standoc/inline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@ def inline_break(node)
noko do |xml|
xml << node.text
xml.br
end.join
end
end

def page_break(node)
attrs = {}
node.option?("landscape") and attrs[:orientation] = "landscape"
node.option?("portrait") and attrs[:orientation] = "portrait"
noko { |xml| xml.pagebreak **attr_code(attrs) }.join
noko { |xml| xml.pagebreak **attr_code(attrs) }
end

def thematic_break(_node)
# noko(&:hr).join # Do not do this, noko blows up
noko { |xml| xml.hr }.join # rubocop:disable Style/SymbolProc
noko { |xml| xml.hr } # rubocop:disable Style/SymbolProc
end

def latex_parse1(text, block)
Expand Down Expand Up @@ -102,7 +102,7 @@ def inline_quoted(node)
xml << node.text
end
end
end.join
end
end

def hash2styles(role)
Expand Down Expand Up @@ -146,15 +146,15 @@ def image_attributes1(node, uri, type)
def inline_image(node)
noko do |xml|
xml.image **image_attributes(node)
end.join
end
end

def inline_indexterm(node)
noko do |xml|
node.type == :visible and xml << node.text
terms = (node.attr("terms") || [node.text]).map { |x| xml_encode(x) }
inline_indexterm1(xml, terms)
end.join
end
end

def inline_indexterm1(xml, terms)
Expand Down
Loading

0 comments on commit 1489e35

Please sign in to comment.