-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Test for RFC 2732 URI (XSD v1.0 xs:anyURI) (#40)
In this change, we add an ABNF grammar for RFC 2732 and review manually the resulting Python code. Note, that RFC 2732 is an amendment to RFC 2396 that namely fixes the IPv4 pattern and introduces a pattern for IPv6.
- Loading branch information
Showing
3 changed files
with
135 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
alphanum = '[a-zA-Z0-9]' | ||
mark = "[\\-_.!~*'()]" | ||
unreserved = f'({alphanum}|{mark})' | ||
hex = ( | ||
'([0-9]|[aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[aA]|[bB]|[cC]|[dD]|[e' | ||
'E]|[fF])' | ||
) | ||
escaped = f'%{hex}{hex}' | ||
pchar = f'({unreserved}|{escaped}|[:@&=+$,])' | ||
param = f'({pchar})*' | ||
segment = f'({pchar})*(;{param})*' | ||
path_segments = f'{segment}(/{segment})*' | ||
abs_path = f'/{path_segments}' | ||
scheme = '[a-zA-Z][a-zA-Z0-9+\\-.]*' | ||
userinfo = f'({unreserved}|{escaped}|[;:&=+$,])*' | ||
domainlabel = f'({alphanum}|{alphanum}({alphanum}|-)*{alphanum})' | ||
toplabel = f'([a-zA-Z]|[a-zA-Z]({alphanum}|-)*{alphanum})' | ||
hostname = f'({domainlabel}\\.)*{toplabel}(\\.)?' | ||
ipv4address = '[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}' | ||
hex4 = '[0-9A-Fa-f]{1,4}' | ||
hexseq = f'{hex4}(:{hex4})*' | ||
hexpart = f'({hexseq}|{hexseq}::({hexseq})?|::({hexseq})?)' | ||
ipv6address = f'{hexpart}(:{ipv4address})?' | ||
ipv6reference = f'\\[{ipv6address}\\]' | ||
host = f'({hostname}|{ipv4address}|{ipv6reference})' | ||
port = '[0-9]*' | ||
hostport = f'{host}(:{port})?' | ||
server = f'(({userinfo}@)?{hostport})?' | ||
reg_name = f'({unreserved}|{escaped}|[$,;:@&=+])+' | ||
authority = f'({server}|{reg_name})' | ||
net_path = f'//{authority}({abs_path})?' | ||
reserved = '[;/?:@&=+$,\\[\\]]' | ||
uric = f'({reserved}|{unreserved}|{escaped})' | ||
query = f'({uric})*' | ||
hier_part = f'({net_path}|{abs_path})(\\?{query})?' | ||
uric_no_slash = f'({unreserved}|{escaped}|[;?:@&=+$,])' | ||
opaque_part = f'{uric_no_slash}({uric})*' | ||
absoluteuri = f'{scheme}:({hier_part}|{opaque_part})' | ||
fragment = f'({uric})*' | ||
ipv6prefix = f'{hexpart}/[0-9]{{1,2}}' | ||
path = f'({abs_path}|{opaque_part})?' | ||
rel_segment = f'({unreserved}|{escaped}|[;@&=+$,])+' | ||
rel_path = f'{rel_segment}({abs_path})?' | ||
relativeuri = f'({net_path}|{abs_path}|{rel_path})(\\?{query})?' | ||
uri_reference = f'({absoluteuri}|{relativeuri})?(\\#{fragment})?' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
; Contains RFC 2396 (URI) and the amendments of RFC 2732 (IPv6) | ||
; From: https://www.ietf.org/rfc/rfc2396.txt and | ||
; https://www.ietf.org/rfc/rfc2732.txt. Together, they specify | ||
; `xs:anyURI` in XSD version 1.0: | ||
; https://www.w3.org/TR/xmlschema-2/#anyURI. | ||
|
||
; The following changes have been made to RFC 2396: | ||
; 1. change the 'host' non-terminal to add an IPv6 option | ||
; 2. Replace the definition of 'IPv4address' with that of RFC 2373 | ||
; 3. Add "[" and "]" to the set of `reserved` characters | ||
|
||
; Note, that the amendments of RFC 2396 refer to definitions of yet | ||
; another RFC, namely RFC 2373 for the definitions of `IPv6address` | ||
; and `IPv4address`. These have been taken from here: | ||
; https://www.ietf.org/rfc/rfc2373.txt | ||
|
||
alphanum = alpha / digit | ||
|
||
uric = reserved / unreserved / escaped | ||
|
||
reserved = ";" / "/" / "?" / ":" / "@" / "&" / "=" / "+" / | ||
"$" / "," / "[" / "]" | ||
|
||
unreserved = alphanum / mark | ||
|
||
mark = "-" / "_" / "." / "!" / "~" / "*" / "'" / "(" / ")" | ||
|
||
escaped = "%" hex hex | ||
|
||
hex = digit / "A" / "B" / "C" / "D" / "E" / "F" / | ||
"a" / "b" / "c" / "d" / "e" / "f" | ||
|
||
absoluteURI = scheme ":" ( hier-part / opaque-part ) | ||
|
||
hier-part = ( net-path / abs-path ) [ "?" query ] | ||
net-path = "//" authority [ abs-path ] | ||
abs-path = "/" path-segments | ||
|
||
opaque-part = uric-no-slash *uric | ||
|
||
uric-no-slash = unreserved / escaped / ";" / "?" / ":" / "@" / | ||
"&" / "=" / "+" / "$" / "," | ||
|
||
scheme = alpha *( alpha / digit / "+" / "-" / "." ) | ||
|
||
authority = server / reg-name | ||
|
||
reg-name = 1*( unreserved / escaped / "$" / "," / | ||
";" / ":" / "@" / "&" / "=" / "+" ) | ||
|
||
server = [ [ userinfo "@" ] hostport ] | ||
|
||
userinfo = *( unreserved / escaped / | ||
";" / ":" / "&" / "=" / "+" / "$" / "," ) | ||
|
||
hostport = host [ ":" port ] | ||
host = hostname / IPv4address / IPv6reference | ||
|
||
ipv6reference = "[" IPv6address "]" | ||
IPv6address = hexpart [ ":" IPv4address ] | ||
IPv6prefix = hexpart "/" 1*2DIGIT | ||
hexpart = hexseq / hexseq "::" [ hexseq ] / "::" [ hexseq ] | ||
hexseq = hex4 *( ":" hex4) | ||
hex4 = 1*4HEXDIG | ||
|
||
hostname = *( domainlabel "." ) toplabel [ "." ] | ||
domainlabel = alphanum / alphanum *( alphanum / "-" ) alphanum | ||
toplabel = alpha / alpha *( alphanum / "-" ) alphanum | ||
|
||
IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT | ||
port = *digit | ||
|
||
path = [ abs-path / opaque-part ] | ||
path-segments = segment *( "/" segment ) | ||
segment = *pchar *( ";" param ) | ||
param = *pchar | ||
pchar = unreserved / escaped / | ||
":" / "@" / "&" / "=" / "+" / "$" / "," | ||
|
||
query = *uric | ||
|
||
URI-reference = [ absoluteURI / relativeURI ] [ "#" fragment ] | ||
|
||
fragment = *uric | ||
|
||
relativeURI = ( net-path / abs-path / rel-path ) [ "?" query ] | ||
|
||
rel-path = rel-segment [ abs-path ] | ||
rel-segment = 1*( unreserved / escaped / | ||
";" / "@" / "&" / "=" / "+" / "$" / "," ) |