Skip to content

Commit

Permalink
Test for RFC 2732 URI (XSD v1.0 xs:anyURI) (#40)
Browse files Browse the repository at this point in the history
In this change, we add an ABNF grammar for
RFC 2732 and review manually the resulting Python
code.

Note, that RFC 2732 is an amendment to RFC 2396
that namely fixes the IPv4 pattern and introduces
a pattern for IPv6.
  • Loading branch information
s-heppner authored Oct 22, 2024
1 parent 8d17547 commit 505ae94
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 0 deletions.
Empty file.
45 changes: 45 additions & 0 deletions test_data/nested-python/rfc2732/expected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
alphanum = '[a-zA-Z0-9]'
mark = "[\\-_.!~*'()]"
unreserved = f'({alphanum}|{mark})'
hex = (
'([0-9]|[aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[aA]|[bB]|[cC]|[dD]|[e'
'E]|[fF])'
)
escaped = f'%{hex}{hex}'
pchar = f'({unreserved}|{escaped}|[:@&=+$,])'
param = f'({pchar})*'
segment = f'({pchar})*(;{param})*'
path_segments = f'{segment}(/{segment})*'
abs_path = f'/{path_segments}'
scheme = '[a-zA-Z][a-zA-Z0-9+\\-.]*'
userinfo = f'({unreserved}|{escaped}|[;:&=+$,])*'
domainlabel = f'({alphanum}|{alphanum}({alphanum}|-)*{alphanum})'
toplabel = f'([a-zA-Z]|[a-zA-Z]({alphanum}|-)*{alphanum})'
hostname = f'({domainlabel}\\.)*{toplabel}(\\.)?'
ipv4address = '[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}'
hex4 = '[0-9A-Fa-f]{1,4}'
hexseq = f'{hex4}(:{hex4})*'
hexpart = f'({hexseq}|{hexseq}::({hexseq})?|::({hexseq})?)'
ipv6address = f'{hexpart}(:{ipv4address})?'
ipv6reference = f'\\[{ipv6address}\\]'
host = f'({hostname}|{ipv4address}|{ipv6reference})'
port = '[0-9]*'
hostport = f'{host}(:{port})?'
server = f'(({userinfo}@)?{hostport})?'
reg_name = f'({unreserved}|{escaped}|[$,;:@&=+])+'
authority = f'({server}|{reg_name})'
net_path = f'//{authority}({abs_path})?'
reserved = '[;/?:@&=+$,\\[\\]]'
uric = f'({reserved}|{unreserved}|{escaped})'
query = f'({uric})*'
hier_part = f'({net_path}|{abs_path})(\\?{query})?'
uric_no_slash = f'({unreserved}|{escaped}|[;?:@&=+$,])'
opaque_part = f'{uric_no_slash}({uric})*'
absoluteuri = f'{scheme}:({hier_part}|{opaque_part})'
fragment = f'({uric})*'
ipv6prefix = f'{hexpart}/[0-9]{{1,2}}'
path = f'({abs_path}|{opaque_part})?'
rel_segment = f'({unreserved}|{escaped}|[;@&=+$,])+'
rel_path = f'{rel_segment}({abs_path})?'
relativeuri = f'({net_path}|{abs_path}|{rel_path})(\\?{query})?'
uri_reference = f'({absoluteuri}|{relativeuri})?(\\#{fragment})?'
90 changes: 90 additions & 0 deletions test_data/nested-python/rfc2732/grammar.abnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
; Contains RFC 2396 (URI) and the amendments of RFC 2732 (IPv6)
; From: https://www.ietf.org/rfc/rfc2396.txt and
; https://www.ietf.org/rfc/rfc2732.txt. Together, they specify
; `xs:anyURI` in XSD version 1.0:
; https://www.w3.org/TR/xmlschema-2/#anyURI.

; The following changes have been made to RFC 2396:
; 1. change the 'host' non-terminal to add an IPv6 option
; 2. Replace the definition of 'IPv4address' with that of RFC 2373
; 3. Add "[" and "]" to the set of `reserved` characters

; Note, that the amendments of RFC 2396 refer to definitions of yet
; another RFC, namely RFC 2373 for the definitions of `IPv6address`
; and `IPv4address`. These have been taken from here:
; https://www.ietf.org/rfc/rfc2373.txt

alphanum = alpha / digit

uric = reserved / unreserved / escaped

reserved = ";" / "/" / "?" / ":" / "@" / "&" / "=" / "+" /
"$" / "," / "[" / "]"

unreserved = alphanum / mark

mark = "-" / "_" / "." / "!" / "~" / "*" / "'" / "(" / ")"

escaped = "%" hex hex

hex = digit / "A" / "B" / "C" / "D" / "E" / "F" /
"a" / "b" / "c" / "d" / "e" / "f"

absoluteURI = scheme ":" ( hier-part / opaque-part )

hier-part = ( net-path / abs-path ) [ "?" query ]
net-path = "//" authority [ abs-path ]
abs-path = "/" path-segments

opaque-part = uric-no-slash *uric

uric-no-slash = unreserved / escaped / ";" / "?" / ":" / "@" /
"&" / "=" / "+" / "$" / ","

scheme = alpha *( alpha / digit / "+" / "-" / "." )

authority = server / reg-name

reg-name = 1*( unreserved / escaped / "$" / "," /
";" / ":" / "@" / "&" / "=" / "+" )

server = [ [ userinfo "@" ] hostport ]

userinfo = *( unreserved / escaped /
";" / ":" / "&" / "=" / "+" / "$" / "," )

hostport = host [ ":" port ]
host = hostname / IPv4address / IPv6reference

ipv6reference = "[" IPv6address "]"
IPv6address = hexpart [ ":" IPv4address ]
IPv6prefix = hexpart "/" 1*2DIGIT
hexpart = hexseq / hexseq "::" [ hexseq ] / "::" [ hexseq ]
hexseq = hex4 *( ":" hex4)
hex4 = 1*4HEXDIG

hostname = *( domainlabel "." ) toplabel [ "." ]
domainlabel = alphanum / alphanum *( alphanum / "-" ) alphanum
toplabel = alpha / alpha *( alphanum / "-" ) alphanum

IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
port = *digit

path = [ abs-path / opaque-part ]
path-segments = segment *( "/" segment )
segment = *pchar *( ";" param )
param = *pchar
pchar = unreserved / escaped /
":" / "@" / "&" / "=" / "+" / "$" / ","

query = *uric

URI-reference = [ absoluteURI / relativeURI ] [ "#" fragment ]

fragment = *uric

relativeURI = ( net-path / abs-path / rel-path ) [ "?" query ]

rel-path = rel-segment [ abs-path ]
rel-segment = 1*( unreserved / escaped /
";" / "@" / "&" / "=" / "+" / "$" / "," )

0 comments on commit 505ae94

Please sign in to comment.