diff --git a/src/main/resources/de/bottlecaps/convert/xq/bison/bison-to-w3c.xq b/src/main/resources/de/bottlecaps/convert/xq/bison/bison-to-w3c.xq index 959e25d..f4eff9a 100644 --- a/src/main/resources/de/bottlecaps/convert/xq/bison/bison-to-w3c.xq +++ b/src/main/resources/de/bottlecaps/convert/xq/bison/bison-to-w3c.xq @@ -37,23 +37,17 @@ declare function x:rewrite($alternatives as element(rhs)+) as element()* declare function x:bison-to-w3c($parse-tree as element(input)) as element(g:grammar) { - t:remove-right-recursion - ( - t:remove-left-recursion - ( - element g:grammar + element g:grammar + { + let $rules := $parse-tree//rules + for $id in distinct-values($rules/id/ID) + let $rule := $rules[id/ID = $id] + order by min($rule/index-of($rules, .)) + return + element g:production { - let $rules := $parse-tree//rules - for $id in distinct-values($rules/id/ID) - let $rule := $rules[id/ID = $id] - order by min($rule/index-of($rules, .)) - return - element g:production - { - attribute name {$id}, - x:rewrite($rule/rhs) - } + attribute name {$id}, + x:rewrite($rule/rhs) } - ) - ) + } }; diff --git a/src/main/resources/de/bottlecaps/convert/xq/gold/gold-to-w3c.xq b/src/main/resources/de/bottlecaps/convert/xq/gold/gold-to-w3c.xq index 45fc9c7..139ae60 100644 --- a/src/main/resources/de/bottlecaps/convert/xq/gold/gold-to-w3c.xq +++ b/src/main/resources/de/bottlecaps/convert/xq/gold/gold-to-w3c.xq @@ -676,19 +676,13 @@ declare function p:gold-to-w3c($parse-tree as element(Grammar)) as element(g:gra :) () return - t:remove-right-recursion + p:case-insensitive-refs ( - t:remove-left-recursion - ( - p:case-insensitive-refs - ( - - {$syntax-productions, $ignore-production[@whitespace-spec]} - {processing-instruction TOKENS{}} - {$tokens-productions, $ignore-production[empty(@whitespace-spec)], $eof-production} - {$preferences} - - ) - ) + + {$syntax-productions, $ignore-production[@whitespace-spec]} + {processing-instruction TOKENS{}} + {$tokens-productions, $ignore-production[empty(@whitespace-spec)], $eof-production} + {$preferences} + ) }; diff --git a/src/main/resources/de/bottlecaps/convert/xq/jison/jison-to-w3c.xq b/src/main/resources/de/bottlecaps/convert/xq/jison/jison-to-w3c.xq index 2834100..bb5b9c0 100644 --- a/src/main/resources/de/bottlecaps/convert/xq/jison/jison-to-w3c.xq +++ b/src/main/resources/de/bottlecaps/convert/xq/jison/jison-to-w3c.xq @@ -49,47 +49,41 @@ declare function x:tokenize($string as xs:string*) as xs:string* declare function x:jison-to-w3c($parse-tree as element(jison)) as element(g:grammar) { - t:remove-right-recursion - ( - t:remove-left-recursion - ( - element g:grammar - { - for $p in $parse-tree/spec/grammar/production_list/production - return element g:production {attribute name {$p/id/ID}, x:rewrite($p/handle_list/handle_action)}, + element g:grammar + { + for $p in $parse-tree/spec/grammar/production_list/production + return element g:production {attribute name {$p/id/ID}, x:rewrite($p/handle_list/handle_action)}, - for $p in $parse-tree/json/bnf-pair/rule-pair - let $id := $p/string + for $p in $parse-tree/json/bnf-pair/rule-pair + let $id := $p/string + return + element g:production + { + attribute name {substring($id, 2, string-length($id) - 2)}, + x:rewrite + ( + for $s in $p/alternative/string + let $symbols := x:tokenize(substring($s, 2, string-length($s) - 2)) return - element g:production - { - attribute name {substring($id, 2, string-length($id) - 2)}, - x:rewrite - ( - for $s in $p/alternative/string - let $symbols := x:tokenize(substring($s, 2, string-length($s) - 2)) - return - if (empty($symbols)) then - - else - element handle_action + if (empty($symbols)) then + + else + element handle_action + { + for $t in $symbols + return + element handle { - for $t in $symbols - return - element handle - { - element symbol - { - if (matches($t, "^\p{Lu}") or $parse-tree/json/bnf-pair/rule-pair/string = concat("""", $t, """")) then - element id {element ID {$t}} - else - element STRING {concat('"', $t, '"')} - } - } + element symbol + { + if (matches($t, "^\p{Lu}") or $parse-tree/json/bnf-pair/rule-pair/string = concat("""", $t, """")) then + element id {element ID {$t}} + else + element STRING {concat('"', $t, '"')} + } } - ) - } + } + ) } - ) - ) + } }; diff --git a/src/main/resources/de/bottlecaps/convert/xq/to-w3c.xq b/src/main/resources/de/bottlecaps/convert/xq/to-w3c.xq index 01ecac5..e843c16 100644 --- a/src/main/resources/de/bottlecaps/convert/xq/to-w3c.xq +++ b/src/main/resources/de/bottlecaps/convert/xq/to-w3c.xq @@ -231,1093 +231,6 @@ declare function x:split-string($todo as xs:string) as element()* x:split-string($todo, ()) }; -declare variable $x:enable := false(); -declare variable $x:direct-recursion-only := true(); - -(:~ - : Check whether we are facing an indirectly recursive g:ref node. - : - : @param $node the candidate node. - : @return true, if it is an indirectly recursive reference. - :) -declare function x:is-indirectly-recursive($production-name as xs:string, - $node as element()?, - $parser-productions as element(g:production)*, - $epsilon-nonterminals as xs:string*) as xs:boolean -{ - $node/self::g:ref[empty(@context)] - and ($node/parent::g:choice or x:is-epsilon-sequence($node/preceding-sibling::*, $epsilon-nonterminals)) - and $node/@name != $production-name - and $parser-productions/@name = $node/@name - and $production-name = x:left-nonterminals($node/@name, (), $epsilon-nonterminals, $parser-productions) - - and $x:enable - and (not($x:direct-recursion-only) or matches($node/@name, concat("^", $production-name, "_+[1-9][0-9]*$"))) -}; - -(:~ - : Remove left-recursion from production. - : - : @param $p the production. - : @return the production, with left-recursion removed. - :) -declare function x:remove-left-recursion($p0 as element(g:production), - $p as element(g:production), - $parser-productions as element(g:production)*, - $epsilon-nonterminals as xs:string*) as element(g:production) -{ - let $name := data($p/@name) - let $recursions := $p//g:ref[x:is-indirectly-recursive($name, ., $parser-productions, $epsilon-nonterminals)] - return - if (empty($recursions)) then - $p - else - let $alternatives := x:bnf-alternatives($p) - let $rewritten-alternatives := - for $alternative in $alternatives - let $items := n:unwrap-sequence($alternative) - let $ref := ($items/self::g:ref[x:is-indirectly-recursive($name, ., $parser-productions, $epsilon-nonterminals)])[1] - return - if (empty($ref)) then - $alternative - else - for $replacement in x:bnf-alternatives($parser-productions[@name = $ref/@name]) - return - n:wrap-sequence - (( - $items[. << $ref], - n:unwrap-sequence($replacement), - $items[. >> $ref] - )) - let $rewritten-production := - element g:production - { - $p/@*, - if (count($rewritten-alternatives) <= 1) then - n:unwrap-sequence($rewritten-alternatives) - else - element g:choice{$rewritten-alternatives} - } - let $rewritten-productions := ($parser-productions[@name != $name], $rewritten-production) - return - if (count(x:left-nonterminals($name, (), $epsilon-nonterminals, $rewritten-productions)) - >= count(x:left-nonterminals($name, (), $epsilon-nonterminals, $parser-productions))) then - $p0 - else - x:remove-left-recursion($p0, $rewritten-production, $rewritten-productions, $epsilon-nonterminals) -}; - -(:~ - : Normalize a grammar to EBNF, by rewriting into a normalized grammar. - : - : @param $grammar the grammar. - : @param $generated-name-introducer the generated name introducer. - : @return the normalized grammar. - :) -declare function x:generated-name-introducer($grammar as element(g:grammar), $proposal as xs:string) as xs:string -{ - if (not($proposal != "")) then - x:generated-name-introducer($grammar, "_") - else if (some $name in ($grammar//g:production/@name, $grammar//g:ref/@name) - satisfies x:is-generated-name($name, $proposal)) then - x:generated-name-introducer($grammar, concat($proposal, $proposal)) - else - $proposal -}; - -declare function x:is-generated-name($name as xs:string, $generated-name-introducer as xs:string) as xs:boolean -{ - matches($name, concat($generated-name-introducer, "[1-9][0-9]*$")) -}; - -(:~ - :) -declare function x:is-epsilon-sequence($nodes as node()*, $epsilon-nonterminals) as xs:boolean -{ - every $node in $nodes - satisfies $node/self::g:ref[empty(@context) and @name = $epsilon-nonterminals] -}; - -(:~ - :) -declare function x:is-epsilon-production($p as element(g:production), $epsilon-nonterminals) as xs:boolean -{ - if ($p/g:choice) then - some $c in $p/g:choice/* - satisfies - if ($c/self::g:sequence) then - x:is-epsilon-sequence($c/*, $epsilon-nonterminals) - else - x:is-epsilon-sequence($c, $epsilon-nonterminals) - else - x:is-epsilon-sequence($p/*, $epsilon-nonterminals) -}; - -(:~ - :) -declare function x:epsilon-nonterminals($result as xs:string*, - $parser-productions as element(g:production)*) as xs:string* -{ - let $new-result := - for $p in $parser-productions[not(@name = $result)] - where x:is-epsilon-production($p, $result) - return data($p/@name) - return - if (empty($new-result)) then - $result - else - x:epsilon-nonterminals(($result, $new-result), $parser-productions) -}; - -(:~ - :) -declare function x:epsilon-nonterminals($parser-productions as element(g:production)*) as xs:string* -{ - x:epsilon-nonterminals((), $parser-productions) -}; - -(:~ - : Remove left-recursion from grammar. - : - : @param $ast the grammar. - : @return the grammar, without left-recursive productions - : transformed to equivalent EBNF constructs. - :) -declare function x:remove-left-recursion($ast as element(g:grammar)) as element(g:grammar) -{ - let $parser-productions := x:parser-productions($ast) - let $inline := - for $p in $parser-productions - let $nt := data($p/@name) - let $other-ref := $parser-productions[@name ne $nt]//g:ref[empty(@context) and @name = $nt] - let $self-ref := $parser-productions[@name eq $nt]//g:ref[empty(@context) and @name = $nt] - let $left-recursive-ref := x:left-recursive-refs($p) - where count($other-ref) eq 1 - and exists($left-recursive-ref) - and count($left-recursive-ref) eq count($self-ref) - return $nt - - let $generated-name-introducer := x:generated-name-introducer($ast, "_") - let $bnf := if (x:is-bnf($ast)) then $ast else x:bnf($ast, $generated-name-introducer) - let $parser-productions := x:parser-productions($bnf) - let $epsilon-nonterminals := x:epsilon-nonterminals($parser-productions) - let $ebnf := x:ebnf($bnf, $generated-name-introducer) - let $inline-productions := - for $p in x:parser-productions($ebnf) - where empty($p/@context) - and $p/@name = $inline - and empty(x:left-recursive-refs($p)) - return $p - - let $ebnf := x:inline($ebnf, $inline-productions) - return $ebnf -}; - -(:~ - : Remove direct right-recursion from grammar. - : - : @param $ast the grammar. - : @return the grammar, without left- and right-recursive productions - : transformed to equivalent EBNF constructs. - :) -declare function x:remove-right-recursion($ast as node()) as node() -{ - let $reversed := n:reverse($ast) - let $removed := x:remove-left-recursion($reversed) - return - if (deep-equal($reversed, $removed)) then - $ast - else - n:reverse($removed) -}; - -(:~ - : Apply factoring to grammar. - : - : @param $grammar the grammar. - : @param $factoring the factoring options, i.e. "none", "left-only", "full-left", "right-only", "full-right" - : "left-right", or "right-left". - : @return the transformed grammar. - :) -declare function x:factorize($grammar as element(g:grammar), - $factoring as xs:string) as element(g:grammar) -{ - if ($factoring = "none") then - $grammar - else - let $g3 := n:normalize($grammar) - let $g4 := - n:denormalize - ( - n:introduce-separators - ( - if ($factoring = "left-only") then - x:left-factorize($g3) - else if ($factoring = "right-only") then - x:right-factorize($g3) - else if ($factoring = "full-left") then - x:left-factorize(x:right-factorize(x:left-factorize($g3))) - else if ($factoring = "full-right") then - x:right-factorize(x:left-factorize(x:right-factorize($g3))) - else - error(xs:QName("x:factorize"), concat("invalid argument: $factoring: ", $factoring)) - ) - ) - return - if (deep-equal($grammar, $g4)) then - $grammar - else - x:factorize($g4, $factoring) -}; - -(:~ - : Remove recursion from grammar. - : - : @param $grammar the grammar. - : @param $recursion-removal the removal options, i.e. "left" and/or "right". - : @return the transformed grammar. - :) -declare function x:remove-recursion($grammar as element(g:grammar), - $recursion-removal as xs:string*) as element(g:grammar) -{ - if (empty($recursion-removal[not(. = ("left", "right"))])) then - let $g1 := if ($recursion-removal = "left") then x:remove-left-recursion($grammar) else $grammar - let $g2 := if ($recursion-removal = "right") then x:remove-right-recursion($g1) else $g1 - return $g2 - else - error(xs:QName("x:remove-recursion"), concat("invalid argument: $recursion-removal: ", string-join($recursion-removal, ", "))) -}; - -(:~ - : Normalize a sequence of nodes to BNF, by rewriting the sequence. This - : overload is for everything below g:production. - : - : @param $done the result of previous recursion levels. - : @param $todo the sequence of nodes to be normalized to BNF. - : @param $subrules the sequence of all subrule elements of the parent - : production. - : @param $generated-name-introducer the generated name introducer. - : @return the grammar, rewritten to BNF. - :) -declare function x:bnf($done as node()*, $todo as node()*, $subrules as node()*, $generated-name-introducer as xs:string) as node()* -{ - if (empty($todo)) then - $done - else - let $node := $todo[1] - return - x:bnf - ( - ( - $done, - if (not($node instance of element(g:optional)) and - not($node instance of element(g:choice)) and - not($node instance of element(g:orderedChoice)) and - not($node instance of element(g:zeroOrMore)) and - not($node instance of element(g:oneOrMore)) and - not($node instance of element(g:subtract))) then - $node - else if ($subrules[. is $node]) then - element g:ref {x:subrule-name($node, $generated-name-introducer, n:index-of-node($subrules, $node))} - else - x:subrule-body($node/ancestor::g:production/@name, $node, $subrules, $generated-name-introducer) - ), - $todo[position() > 1], - $subrules, - $generated-name-introducer - ) -}; - -(:~ - : Normalize a sequence of nodes to BNF, by rewriting the sequence. This - : overload is for everything above and including g:production. - : - : @param $nodes the sequence of nodes to be normalized to BNF. - : @param $generated-name-introducer the generated name introducer. - : @return the normalized nodes. - :) -declare function x:bnf($nodes as node()*, $generated-name-introducer as xs:string) as node()* -{ - for $node in $nodes - return - typeswitch ($node) - case element(g:grammar) return - let $grammar := n:group-productions-by-nonterminal($node) - return - element g:grammar - { - $grammar/@*, - let $end := $grammar/processing-instruction()[local-name(.) = ("TOKENS", "ENCORE") and string(.) eq ""][1] - return - ( - x:bnf(n:children($grammar)[not(. is $end or . >> $end)], $generated-name-introducer), - $end, - $grammar/node()[. >> $end] - ) - } - case element(g:production) return - let $subrules := - for $subrule in - $node//(g:optional[empty(parent::g:choice)], - g:choice[not(deep-equal(., n:children(parent::g:optional)))], - g:orderedChoice, - g:oneOrMore, - g:zeroOrMore, - g:subtract) - where not(deep-equal($subrule, n:children($subrule/parent::g:production))) - return $subrule - return - ( - element g:production {$node/@*, x:bnf((), n:children($node), $subrules, $generated-name-introducer)}, - for $subrule at $i in $subrules - let $name := x:subrule-name($subrule, $generated-name-introducer, $i) - return - element g:production - { - attribute whitespace-spec {"explicit"}[$subrule/ancestor::g:production/@whitespace-spec = ("explicit", "definition")], - $name, - x:subrule-body($name, $subrule, $subrules, $generated-name-introducer) - } - ) - case processing-instruction() return - $node - default return - error(xs:QName("x:bnf"), string-join(("invalid node type", string(node-name($node)), string($node)), " ")) -}; - -(:~ - : Construct a temporary name for a nonterminal. The name is created from - : the production name surrounding the given element, the generated name - : introducer, and a sequence number. - : - : @param $node the node causing subrule generation. - : @param $generated-name-introducer the generated name introducer. - : @param $i the sequence number. - : @return the constructed subrule name. - :) -declare function x:subrule-name($node as element(), $generated-name-introducer as xs:string, $i as xs:integer) as attribute(name) -{ - attribute name - { - concat - ( - $node/ancestor-or-self::g:production/@name, - $generated-name-introducer, - string($i) - ) - } -}; - -(:~ - : Create a sequence of productions from complex elements moved out - : of a parent production during BNF normalization. - : - : @param $name the production name. - : @param $subrule the current to-be-extracted operator. - : @param $subrules the sequence of all extracted operators. - : @param $generated-name-introducer the generated name introducer. - : @return the sequence of productions. - :) -declare function x:subrule-body($name as attribute(name), - $subrule as element(), - $subrules as element()*, - $generated-name-introducer as xs:string) as element() -{ - if ($subrule/self::g:optional) then - element g:choice - { - element g:sequence {}, - let $children := n:children($subrule) - return - if (deep-equal($children, $subrule/g:choice)) then - for $case in n:children($children) - return n:wrap-sequence(x:bnf((), n:unwrap-sequence($case), $subrules, $generated-name-introducer)) - else - n:wrap-sequence(x:bnf((), $children, $subrules, $generated-name-introducer)) - } - else if ($subrule/self::g:choice) then - element g:choice - { - $subrule/@*, - for $case in n:children($subrule) - return - if ($case/self::g:optional) then - ( - element g:sequence {}, - n:wrap-sequence(x:bnf((), n:children($case), $subrules, $generated-name-introducer)) - ) - else - n:wrap-sequence(x:bnf((), n:unwrap-sequence($case), $subrules, $generated-name-introducer)) - } - else if ($subrule/self::g:subtract or $subrule/self::g:orderedChoice) then - element {node-name($subrule)} - { - $subrule/@*, - for $child in n:children($subrule) - return n:wrap-sequence(x:bnf((), n:unwrap-sequence($child), $subrules, $generated-name-introducer)) - } - else if ($subrule/self::g:zeroOrMore) then - element g:choice - { - element g:sequence {}, - n:wrap-sequence - (( - element g:ref {$name}, - x:bnf((), n:children($subrule), $subrules, $generated-name-introducer) - )) - } - else if ($subrule/self::g:oneOrMore) then - let $bnf := x:bnf((), n:children($subrule), $subrules, $generated-name-introducer) - return - element g:choice - { - n:wrap-sequence($bnf), - n:wrap-sequence((element g:ref {$name}, $bnf)) - } - else - error(xs:QName("x:subrule-body"), string-join(("invalid node type", string(node-name($subrule)), string($subrule)), " ")) -}; - -(:~ - : Get alternatives from a sequence of production (of one nonterminal). - : - : @param $productions the productions. - : @return the sequence of alternatives. - :) -declare function x:bnf-alternatives($productions as element(g:production)*) -{ - for $p in $productions - return - if ($p/*[last() = 1]/self::g:choice) then - $p/*/* - else - n:wrap-sequence($p/*) -}; - -(:~ - : Normalize a sequence of nodes to EBNF, by rewriting into a normalized - : node sequence. This overload is for everything below g:grammar. - : - : @param $nodes the sequence of nodes. - : @param $single-reference-nonterminals the set of nonterminals productions - : that will be inlined to their reference context. - : @param $self-reference the reference currently being integrated. This serves - : for suppressing recursive calls, as they become Kleene operators in the - : reference context. - : @return the normalized sequence. - :) -declare function x:ebnf($nodes as node()*, - $single-reference-nonterminals as element(g:production)*, - $self-reference as element(g:ref)* - ) as node()* -{ - for $node in $nodes - return - typeswitch ($node) - case element(g:production) return - if ($node/@name = $single-reference-nonterminals/@name) then - () - else - element g:production - { - $node/@*, - let $left-recursive-ref := - x:left-recursive-refs($node) - [parent::g:sequence/parent::g:choice/parent::g:production] - let $self-ref := $node//g:ref[empty(@context) and @name eq $node/@name and - empty(ancestor::*[. >> $node and not(parent::g:choice) and following-sibling::*])] - return - x:ebnf - ( - n:children($node), - $single-reference-nonterminals, - $self-ref[count($left-recursive-ref) eq count($self-ref)] - ) - } - case element(g:choice) return - let $children := n:children($node) - let $empty := $children/self::g:sequence[empty(n:children(.))] - return - if (empty($self-reference)) then - if (exists($empty)) then - element g:optional - { - if (count($children) - count($empty) eq 1) then - for $child in $children - where empty($empty[. is $child]) - return x:ebnf(n:unwrap-sequence($child), $single-reference-nonterminals, ()) - else - n:choice - (( - for $child in $children - where empty($empty[. is $child]) - return n:wrap-sequence(x:ebnf(n:unwrap-sequence($child), $single-reference-nonterminals, ())) - )) - } - else - element g:choice - { - $node/@*, - for $child in $children - return n:wrap-sequence(x:ebnf(n:unwrap-sequence($child), $single-reference-nonterminals, $self-reference)) - } - else - let $anchor := $children[not(.//g:ref[empty(@context) and @name = $self-reference/@name])] - let $recursive := $children[ .//g:ref[empty(@context) and @name = $self-reference/@name] ] - let $seed := - if (count($anchor) eq 1) then - x:ebnf(n:unwrap-sequence($anchor), $single-reference-nonterminals, $self-reference) - else - element g:choice - { - $node/@*, - for $child in $anchor - return n:wrap-sequence(x:ebnf(n:unwrap-sequence($child), $single-reference-nonterminals, $self-reference)) - } - let $loop := - if (count($recursive) eq 1) then - x:ebnf(n:unwrap-sequence($recursive), $single-reference-nonterminals, $self-reference) - else - element g:choice - { - $node/@*, - for $child in $recursive - return n:wrap-sequence(x:ebnf(n:unwrap-sequence($child), $single-reference-nonterminals, $self-reference)) - } - return - if (deep-equal($seed, $loop)) then - element g:oneOrMore {$loop} - else - ($seed, element g:zeroOrMore {$loop}) - case element(g:orderedChoice) return - element {node-name($node)} - { - $node/@*, - for $child in n:children($node) - let $sequence := n:unwrap-sequence($child) - return n:wrap-sequence(x:ebnf($sequence, $single-reference-nonterminals, $self-reference)) - } - case element(g:ref) return - if ($node/@name = $self-reference/@name) then - () - else if ($node/@context or not($node/@name = $single-reference-nonterminals/@name)) then - $node - else - let $definition := $single-reference-nonterminals[@name eq $node/@name] - let $self-reference := $definition//g:ref[empty(@context) and @name eq $node/@name] - return x:ebnf(n:children($definition), $single-reference-nonterminals, $self-reference) - case element() return - element {node-name($node)} - { - $node/@*, - x:ebnf($node/node(), $single-reference-nonterminals, $self-reference) - } - default return - $node -}; - -(:~ - : Normalize a grammar to EBNF, by rewriting into a normalized grammar. - : - : @param $grammar the grammar. - : @param $generated-name-introducer the generated name introducer. - : @return the normalized grammar. - :) -declare function x:ebnf($grammar as element(g:grammar), $generated-name-introducer as xs:string) as element(g:grammar) -{ - let $nonterminals := x:parser-productions($grammar) - let $single-reference-nonterminals := $nonterminals[x:is-generated-name(@name, $generated-name-introducer)] - let $end := n:syntax-end($grammar) - let $ebnf := - x:combine-optional-oneOrMore - ( - x:establish-oneOrMore - ( - element g:grammar - { - $grammar/@*, - x:ebnf - ( - n:children($grammar)[not(. is $end or . >> $end)], - $single-reference-nonterminals, - () - ), - $end, - $grammar/node()[. >> $end] - } - ) - ) - return - if (deep-equal($ebnf, $grammar)) then - $grammar - else - x:ebnf($ebnf, $generated-name-introducer) -}; - -(:~ - : Expand $result by any nonterminal names that occur at the left hand side - : of productions that have names matching $nonterminal. $result is an - : accumulator variable, thus initial calls should pass an empty sequence. - : - : @param $nonterminal the nonterminals - : @param $result the result accumulator. - : @param $empty the empty flag accumulator. - : @param $parser-productions the complete set of available syntax productions. - : @return the sequence of nonterminal names occurring at the very left - : of corresponding productions. - :) -declare function x:left-nonterminals($nonterminal as xs:string*, - $result as xs:string*, - $empty as xs:string*, - $parser-productions as element(g:production)*) as xs:string* -{ - let $left := x:direct-left-nonterminals($nonterminal, $empty, $parser-productions)[not(. = $result)] - return - if (empty($left)) then - $result - else - x:left-nonterminals($left, ($result, $left), $empty, $parser-productions) -}; - -(:~ - : Apply single-production right-factoring transformation to node $ast. - : - : @param $ast the grammar to be transformed. - : @return the transformed grammar. - :) -declare function x:right-factorize($ast as node()) as node() -{ - let $reversed := n:reverse($ast) - let $right-factored := x:left-factorize($reversed) - return - if (deep-equal($reversed, $right-factored)) then - $ast - else - n:reverse($right-factored) -}; - -(:~ - : - :) -declare function x:direct-left-nonterminals($nonterminal as xs:string*, - $empty as xs:string*, - $parser-productions as element(g:production)*) as xs:string* -{ - distinct-values - ( - for $n in $nonterminal - let $p := $parser-productions[@name eq $n] - for $r in - ( - $p/g:ref[empty(preceding-sibling::*[not(self::g:ref) or not(@name = $empty)])], - $p/g:choice/g:ref, - $p/g:choice/g:sequence/g:ref[empty(preceding-sibling::*[not(self::g:ref) or not(@name = $empty)])] - ) - return $r/@name[. = $parser-productions/@name and empty($r/@context)] - ) -}; - -(:~ - : Apply single-production left-factoring transformation to nodes $todo. - : - : @param $todo the nodes to be transformed. - : @param $done the intermediate result, that was calculated in preceding - : recursion levels of this tail-recursive transformation. - : @return the transformed nodes. - :) -declare function x:left-factorize($todo as node()*, $done as node()*) as node()* -{ - if (empty($todo)) then - $done - else if ($todo[1]/self::g:choice and count($todo[1]/*) gt $x:alternative-limit) then - x:left-factorize($todo[position() gt 1], ($done, $todo[1])) - else - let $node := $todo[1] - let $children := n:children($node) - let $left-factor := - if (not($node/self::g:choice)) then - () - else - ( - for $c in $children - let $case := n:unwrap-sequence($c)[1] - where - some $d in $children[. << $c] - satisfies deep-equal($case, n:unwrap-sequence($d)[1]) - return $case - )[1] - let $left-factor-choice := - if ($left-factor or not($node/self::g:choice)) then - () - else - ( - for $c in $children - let $case := n:unwrap-sequence($c)[1] - where $case/self::g:choice - and - ( - every $subcase in n:children($case) - satisfies - some $d in $children - satisfies deep-equal($subcase, $d) - ) - return $case - )[1] - -(: - (: A | B+ D | (B+|) E | F - => A | (B+|)B D | (B+|) E | F - :) - - find prefix of: B+ - use it to create this: (B+|) - and verify prefix exists: (B+|) - if so, replace B+ - by (B+|)B - - do not flatten or the like, and have the left-factor rule catch - it in the next step. -:) - let $left-factor-oom := - if ($left-factor or $left-factor-choice or not($node/self::g:choice)) then - () - else - ( - for $c in $children - let $oneOrMore := n:unwrap-sequence($c)[1] - - where $oneOrMore/self::g:oneOrMore - return - let $hs := n:children($oneOrMore) - let $choice := - element g:choice - { - $oneOrMore, - n:wrap-sequence(()) - } - where - some $d in $children - satisfies deep-equal($choice, n:unwrap-sequence($d)[1]) - return ($oneOrMore, n:wrap-sequence(($choice, $hs))) - )[position() le 2] - - let $single-child := if (count($children) eq 1) then $children else () - let $empty := $children/self::g:sequence[empty(n:children(.))] - let $non-empty := $children[not(. is $empty)] - return - if (exists($left-factor)) then - - (: (A|B C|B D|E) => (A|B(C|D)|E):) - - let $choice := - let $factored := - for $c at $i in $children - let $elements := n:unwrap-sequence($c) - where deep-equal($left-factor, $elements[1]) - return $i - let $cases := - ( - $children[position() lt $factored[1]], - n:wrap-sequence - (( - $left-factor, - n:choice - ( - for $c at $i in $children[position() = $factored] - return n:wrap-sequence(n:unwrap-sequence($c)[position() gt 1]) - ) - )), - for $c at $i in $children - where not($i = $factored) and $i gt $factored[1] - return $c - ) - return n:choice($cases) - return - x:left-factorize(($choice, $todo[position() gt 1]), $done) - - else if (exists($left-factor-choice)) then - - (: (A|(B|C)D|B|C|E) => (A|(B|C)D|(B|C)|E) :) - - let $choice := - let $factored := - for $c at $i in $children - where some $d in n:children($left-factor-choice) satisfies deep-equal($d, $c) - return $i - let $cases := - ( - $children[position() lt $factored[1]], - $left-factor-choice, - $children[not(position() = $factored) and position() gt $factored[1]] - ) - return element g:choice {$cases} (: no flattening here :) - return - x:left-factorize(($choice, $todo[position() gt 1]), $done) - - else if (exists($left-factor-oom)) then - - let $choice := - element g:choice (: no flattening here :) - { - for $c at $i in $children - let $elements := n:unwrap-sequence($c) - return - if (deep-equal($left-factor-oom[1], $elements[1])) then - n:wrap-sequence((n:unwrap-sequence($left-factor-oom[2]), $elements[position() gt 1])) - else - $c - } - return x:left-factorize(($choice, $todo[position() gt 1]), $done) - - else if (count($children) eq 2 - and $node/self::g:choice - and exists($empty) - and n:is-sequence-item($node) - and exists(n:unwrap-sequence($non-empty)) - and deep-equal(n:unwrap-sequence($non-empty)[1], $todo[2])) then - - (: (A B|) A => A (B A|) :) - - x:left-factorize - ( - ( - $todo[2], - n:choice - (( - $empty[. << $non-empty], - n:wrap-sequence - (( - n:unwrap-sequence($non-empty)[position() gt 1], - $todo[2] - )), - $empty[. >> $non-empty] - )), - $todo[position() gt 2] - ), - $done - ) - - else if ($node/self::g:oneOrMore - and n:is-sequence-item($node) - and exists($children) - and deep-equal($children[1], $todo[2])) then - - (: (A B)+ A => A (B A)+ :) - - x:left-factorize - ( - ( - $todo[2], - element g:oneOrMore - { - $children[position() gt 1], - $todo[2] - }, - $todo[position() gt 2] - ), - $done - ) - - else if ($node/self::g:choice - and $children[1]/self::g:oneOrMore - and $children[2]/self::g:sequence[empty(n:children(.))] - and n:is-sequence-item($node) - and exists(n:children($children[1])) - and deep-equal(n:children($children[1])[1], $todo[2])) then - - (: ((A B)+|) A => A ((B A)+|) :) - - x:left-factorize - ( - ( - $todo[2], - element g:choice - { - element g:oneOrMore - { - n:children($children[1])[position() gt 1], - $todo[2] - }, - $children[2] - }, - $todo[position() gt 2] - ), - $done - ) - - else if (empty($children)) then - - x:left-factorize($todo[position() gt 1], ($done, $node)) - - else - - x:left-factorize - ( - $todo[position() gt 1], - ( - $done, - element {node-name($node)} - { - $node/@*, - if (n:is-sequence-item($children)) then - x:left-factorize($children, ()) - else - for $c in $children - return n:wrap-sequence(x:left-factorize(n:unwrap-sequence($c), ())) - } - ) - ) -}; - -(:~ - : Apply single-production left-factoring transformation to node $ast. - : - : @param $ast the grammar to be transformed. - : @return the transformed grammar. - :) -declare function x:left-factorize($ast as node()) as node() -{ - let $left-factored := x:left-factorize($ast, ()) - return - if (deep-equal($ast, $left-factored)) then - $ast - else - x:left-factorize($left-factored) -}; - -(:~ - : Create g:oneOrMore operators from a matching g:zeroOrMore, i.e. replace - : A A* or A* A by A+. Used during EBNF normalization. - : - : @param $node the node to be rewritten. - : @return the rewritten node. - :) -declare function x:establish-oneOrMore($node as node()) as node()* -{ - let $children := n:children($node) - let $zeroOrMore := ($children/self::g:zeroOrMore)[1] - let $zeroOrMore-index := if (empty($zeroOrMore)) then () else n:index-of-node($children, $zeroOrMore) - let $zeroOrMore-children := n:children($zeroOrMore) - let $zeroOrMore-children-count := count($zeroOrMore-children) - return - if (empty($children)) then - $node - else if (exists($zeroOrMore) - and deep-equal($zeroOrMore-children, - $children[ position() >= $zeroOrMore-index - $zeroOrMore-children-count - and position() < $zeroOrMore-index])) then - x:establish-oneOrMore - ( - element {node-name($node)} - { - $node/@*, - for $node in $children[position() < $zeroOrMore-index - $zeroOrMore-children-count] - return x:establish-oneOrMore($node), - x:establish-oneOrMore(element g:oneOrMore{$zeroOrMore-children}), - for $node in $children[position() > $zeroOrMore-index] - return x:establish-oneOrMore($node) - } - ) - else if (exists($zeroOrMore) - and deep-equal($zeroOrMore-children, - $children[ position() > $zeroOrMore-index - and position() <= $zeroOrMore-index + $zeroOrMore-children-count])) then - x:establish-oneOrMore - ( - element {node-name($node)} - { - $node/@*, - for $node in $children[position() lt $zeroOrMore-index] - return x:establish-oneOrMore($node), - x:establish-oneOrMore(element g:oneOrMore{$zeroOrMore-children}), - for $node in $children[position() gt $zeroOrMore-index + $zeroOrMore-children-count] - return x:establish-oneOrMore($node) - } - ) - else - element {node-name($node)} - { - $node/@*, - for $node in $children - return x:establish-oneOrMore($node) - } -}; - -(:~ - : Create g:zeroOrMore operators from a matching g:optional with a single - : g:zeroOrMore child, i.e. replace - : (A+)? by A*. Used during EBNF normalization. - : - : @param $node the node to be rewritten. - : @return the rewritten node. - :) -declare function x:combine-optional-oneOrMore($node as node()) as node()* -{ - if ($node/self::g:optional and $node/g:oneOrMore and count(n:children($node)) eq 1) then - element g:zeroOrMore - { - for $child in n:children(n:children($node)) return x:combine-optional-oneOrMore($child) - } - else if ($node/self::element()) then - element {node-name($node)} - { - $node/@*, - for $child in $node/node() return x:combine-optional-oneOrMore($child) - } - else $node -}; - -(:~ - : Determine sequence of left-recursive references within a grammar fragment. - : - : @param $nodes the grammar fragment. - : @return the sequence of left-recursive references. - :) -declare function x:left-recursive-refs($nodes as element()*) as element(g:ref)* -{ - let $node := n:unwrap-sequence($nodes)[1] - return - typeswitch ($node) - case element(g:production) return - if (count($nodes) ne 1) then - error(xs:QName("n:left-recursive-refs"), string-join(("too many arguments:", $nodes/@name), " ")) - else - x:left-recursive-refs(n:children($node)) - case element(g:optional) return - x:left-recursive-refs(n:children($node)) - case element(g:oneOrMore) return - x:left-recursive-refs(n:children($node)) - case element(g:zeroOrMore) return - x:left-recursive-refs(n:children($node)) - case element(g:choice) return - for $case in n:children($node) - return x:left-recursive-refs(n:unwrap-sequence($case)) - case element(g:ref) return - $node[empty(@context) and @name eq ancestor::g:production/@name] - default return - () -}; -(:~ - : Recursively calculate the set of nonterminals that can be reached - : from a set of start symbols. - : - : @param $nonterminals the set of all nonterminal productions. - : @param $start-symbols the set of start symbols. - : @return the subset of $nonterminals that can be reached from - : $start-symbols. - :) -declare function x:ref($nonterminals as element(g:production)*, - $start-symbols as element(g:production)* - ) as element(g:production)* -{ - let $new := $nonterminals[@name = $start-symbols//g:ref[empty(@context)]/@name] - return - if (empty($new[not(@name = $start-symbols/@name)])) then - $start-symbols - else - x:ref($nonterminals, ($start-symbols, $new)/.) -}; - (:~ : Get the sequence of parser productions from a grammar. :