Skip to content

Commit

Permalink
fix(xml/parser): Fix parsing of legacy document types (#6555)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait authored Dec 2, 2022
1 parent a9cccb0 commit cdf0d8a
Show file tree
Hide file tree
Showing 57 changed files with 1,876 additions and 0 deletions.
41 changes: 41 additions & 0 deletions crates/swc_xml_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ pub enum State {
DoctypeSystemIdentifierSingleQuoted,
DoctypeSystemIdentifierDoubleQuoted,
AfterDoctypeSystemIdentifier,
DoctypeTypeInternalSubSet,
BogusDoctype,
}

Expand Down Expand Up @@ -2245,6 +2246,12 @@ where
self.state = State::Data;
self.emit_doctype_token();
}
// U+005B LEFT SQUARE BRACKET ([)
// Switch to the doctype internal subset state.
Some(c @ '[') => {
self.append_raw_to_doctype_token(c);
self.state = State::DoctypeTypeInternalSubSet;
}
// EOF
// Parse error. Switch to the data state. Emit DOCTYPE token. Reconsume the EOF
// character.
Expand Down Expand Up @@ -2825,6 +2832,12 @@ where
self.state = State::Data;
self.emit_doctype_token();
}
// U+005B LEFT SQUARE BRACKET ([)
// Switch to the doctype internal subset state.
Some(c @ '[') => {
self.append_raw_to_doctype_token(c);
self.state = State::DoctypeTypeInternalSubSet;
}
// EOF
// Parse error. Switch to the data state. Emit DOCTYPE token. Reconsume the EOF
// character.
Expand All @@ -2843,6 +2856,34 @@ where
}
}
}
State::DoctypeTypeInternalSubSet => {
// Consume the next input character:
match self.consume_next_char() {
// U+005D RIGHT SQUARE BRACKET (])
// Switch to the CDATA bracket state.
Some(c @ ']') => {
self.append_raw_to_doctype_token(c);
self.state = State::AfterDoctypeName;
}
// EOF
// Parse error. Switch to the data state. Emit DOCTYPE token. Reconsume the EOF
// character.
None => {
self.emit_error(ErrorKind::EofInDoctype);
self.state = State::Data;
self.emit_doctype_token();
self.reconsume();
}
// Anything else
// Append the current input character to the current DOCTYPE token's system
// identifier.
Some(c) => {
// TODO improve parse legacy declarations
self.validate_input_stream_character(c);
self.append_raw_to_doctype_token(c);
}
}
}
State::BogusDoctype => {
// Consume the next input character:
match self.consume_next_char() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
| <!DOCTYPE svg>
| <root>
| "test"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!DOCTYPE svg [
<!ENTITY Pub-Status "This is a pre-release of the
specification.">
]>
<root>test</root>
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"type": "Document",
"span": {
"start": 1,
"end": 112,
"ctxt": 0
},
"children": [
{
"type": "DocumentType",
"span": {
"start": 1,
"end": 94,
"ctxt": 0
},
"name": "svg",
"publicId": null,
"systemId": null,
"raw": "<!DOCTYPE svg [\n <!ENTITY Pub-Status \"This is a pre-release of the\nspecification.\">\n]>"
},
{
"type": "Element",
"span": {
"start": 95,
"end": 112,
"ctxt": 0
},
"tagName": "root",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 101,
"end": 105,
"ctxt": 0
},
"data": "test",
"raw": "test"
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

x Document
,-[$DIR/tests/fixture/document_type/internal_subset-1/input.xml:1:1]
1 | ,-> <!DOCTYPE svg [
2 | | <!ENTITY Pub-Status "This is a pre-release of the
3 | | specification.">
4 | | ]>
5 | `-> <root>test</root>
`----

x Child
,-[$DIR/tests/fixture/document_type/internal_subset-1/input.xml:1:1]
1 | ,-> <!DOCTYPE svg [
2 | | <!ENTITY Pub-Status "This is a pre-release of the
3 | | specification.">
4 | `-> ]>
5 | <root>test</root>
`----

x DocumentType
,-[$DIR/tests/fixture/document_type/internal_subset-1/input.xml:1:1]
1 | ,-> <!DOCTYPE svg [
2 | | <!ENTITY Pub-Status "This is a pre-release of the
3 | | specification.">
4 | `-> ]>
5 | <root>test</root>
`----

x Child
,-[$DIR/tests/fixture/document_type/internal_subset-1/input.xml:4:1]
4 | ]>
5 | <root>test</root>
: ^^^^^^^^^^^^^^^^^
`----

x Element
,-[$DIR/tests/fixture/document_type/internal_subset-1/input.xml:4:1]
4 | ]>
5 | <root>test</root>
: ^^^^^^^^^^^^^^^^^
`----

x Child
,-[$DIR/tests/fixture/document_type/internal_subset-1/input.xml:4:1]
4 | ]>
5 | <root>test</root>
: ^^^^
`----

x Text
,-[$DIR/tests/fixture/document_type/internal_subset-1/input.xml:4:1]
4 | ]>
5 | <root>test</root>
: ^^^^
`----
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
| <!DOCTYPE svg>
| <root>
| "test"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!DOCTYPE svg [
<!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
]>
<root>test</root>
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"type": "Document",
"span": {
"start": 1,
"end": 127,
"ctxt": 0
},
"children": [
{
"type": "DocumentType",
"span": {
"start": 1,
"end": 109,
"ctxt": 0
},
"name": "svg",
"publicId": null,
"systemId": null,
"raw": "<!DOCTYPE svg [\n <!ENTITY open-hatch SYSTEM \"http://www.textuality.com/boilerplate/OpenHatch.xml\">\n]>"
},
{
"type": "Element",
"span": {
"start": 110,
"end": 127,
"ctxt": 0
},
"tagName": "root",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 116,
"end": 120,
"ctxt": 0
},
"data": "test",
"raw": "test"
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@

x Document
,-[$DIR/tests/fixture/document_type/internal_subset-2/input.xml:1:1]
1 | ,-> <!DOCTYPE svg [
2 | | <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
3 | | ]>
4 | `-> <root>test</root>
`----

x Child
,-[$DIR/tests/fixture/document_type/internal_subset-2/input.xml:1:1]
1 | ,-> <!DOCTYPE svg [
2 | | <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
3 | `-> ]>
4 | <root>test</root>
`----

x DocumentType
,-[$DIR/tests/fixture/document_type/internal_subset-2/input.xml:1:1]
1 | ,-> <!DOCTYPE svg [
2 | | <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
3 | `-> ]>
4 | <root>test</root>
`----

x Child
,-[$DIR/tests/fixture/document_type/internal_subset-2/input.xml:3:1]
3 | ]>
4 | <root>test</root>
: ^^^^^^^^^^^^^^^^^
`----

x Element
,-[$DIR/tests/fixture/document_type/internal_subset-2/input.xml:3:1]
3 | ]>
4 | <root>test</root>
: ^^^^^^^^^^^^^^^^^
`----

x Child
,-[$DIR/tests/fixture/document_type/internal_subset-2/input.xml:3:1]
3 | ]>
4 | <root>test</root>
: ^^^^
`----

x Text
,-[$DIR/tests/fixture/document_type/internal_subset-2/input.xml:3:1]
3 | ]>
4 | <root>test</root>
: ^^^^
`----
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
| <!DOCTYPE svg>
| <root>
| "test"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<!DOCTYPE svg [
<!ENTITY open-hatch
PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN"
"http://www.textuality.com/boilerplate/OpenHatch.xml">
]>
<root>test</root>
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"type": "Document",
"span": {
"start": 1,
"end": 205,
"ctxt": 0
},
"children": [
{
"type": "DocumentType",
"span": {
"start": 1,
"end": 187,
"ctxt": 0
},
"name": "svg",
"publicId": null,
"systemId": null,
"raw": "<!DOCTYPE svg [\n <!ENTITY open-hatch\n PUBLIC \"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"\n \"http://www.textuality.com/boilerplate/OpenHatch.xml\">\n]>"
},
{
"type": "Element",
"span": {
"start": 188,
"end": 205,
"ctxt": 0
},
"tagName": "root",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 194,
"end": 198,
"ctxt": 0
},
"data": "test",
"raw": "test"
}
]
}
]
}
Loading

1 comment on commit cdf0d8a

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: cdf0d8a Previous: 4d422b6 Ratio
es/full/bugs-1 347600 ns/iter (± 21107) 368141 ns/iter (± 28973) 0.94
es/full/minify/libraries/antd 2036833240 ns/iter (± 65299279) 2127516739 ns/iter (± 48051678) 0.96
es/full/minify/libraries/d3 449921640 ns/iter (± 15545822) 473217805 ns/iter (± 14215411) 0.95
es/full/minify/libraries/echarts 1747869817 ns/iter (± 284198529) 1855852465 ns/iter (± 57373701) 0.94
es/full/minify/libraries/jquery 114051503 ns/iter (± 2348462) 119863093 ns/iter (± 13610713) 0.95
es/full/minify/libraries/lodash 131359407 ns/iter (± 5068345) 134541593 ns/iter (± 6967645) 0.98
es/full/minify/libraries/moment 66839836 ns/iter (± 2236533) 71337634 ns/iter (± 4323384) 0.94
es/full/minify/libraries/react 22065996 ns/iter (± 609807) 23052592 ns/iter (± 792122) 0.96
es/full/minify/libraries/terser 342266503 ns/iter (± 15979735) 483160127 ns/iter (± 106913714) 0.71
es/full/minify/libraries/three 625347510 ns/iter (± 13892819) 706939080 ns/iter (± 35525999) 0.88
es/full/minify/libraries/typescript 3659254464 ns/iter (± 77550761) 3973497652 ns/iter (± 119259385) 0.92
es/full/minify/libraries/victory 909940842 ns/iter (± 27053033) 957823876 ns/iter (± 45827666) 0.95
es/full/minify/libraries/vue 172808513 ns/iter (± 13676562) 172554248 ns/iter (± 12135151) 1.00
es/full/codegen/es3 34457 ns/iter (± 2655) 35294 ns/iter (± 4214) 0.98
es/full/codegen/es5 34829 ns/iter (± 1764) 34979 ns/iter (± 2704) 1.00
es/full/codegen/es2015 34404 ns/iter (± 1268) 35621 ns/iter (± 3738) 0.97
es/full/codegen/es2016 35214 ns/iter (± 927) 35248 ns/iter (± 3078) 1.00
es/full/codegen/es2017 34752 ns/iter (± 713) 35701 ns/iter (± 3230) 0.97
es/full/codegen/es2018 34071 ns/iter (± 950) 41745 ns/iter (± 6015) 0.82
es/full/codegen/es2019 34559 ns/iter (± 1412) 44094 ns/iter (± 6960) 0.78
es/full/codegen/es2020 34200 ns/iter (± 1638) 42595 ns/iter (± 7198) 0.80
es/full/all/es3 206436493 ns/iter (± 28503818) 266040720 ns/iter (± 36387557) 0.78
es/full/all/es5 209892739 ns/iter (± 18569666) 210921843 ns/iter (± 27949437) 1.00
es/full/all/es2015 152457874 ns/iter (± 17148838) 163885422 ns/iter (± 10517983) 0.93
es/full/all/es2016 155336471 ns/iter (± 23154947) 203425543 ns/iter (± 28237050) 0.76
es/full/all/es2017 156618860 ns/iter (± 16557551) 161988108 ns/iter (± 15138523) 0.97
es/full/all/es2018 148978570 ns/iter (± 14036603) 175256956 ns/iter (± 20480053) 0.85
es/full/all/es2019 156612369 ns/iter (± 17188799) 177737811 ns/iter (± 21527209) 0.88
es/full/all/es2020 150456177 ns/iter (± 14219881) 170617122 ns/iter (± 15733403) 0.88
es/full/parser 741042 ns/iter (± 38352) 912946 ns/iter (± 127217) 0.81
es/full/base/fixer 26725 ns/iter (± 1304) 31780 ns/iter (± 4400) 0.84
es/full/base/resolver_and_hygiene 93473 ns/iter (± 5180) 100604 ns/iter (± 11805) 0.93
serialization of ast node 211 ns/iter (± 8) 229 ns/iter (± 42) 0.92
serialization of serde 222 ns/iter (± 5) 222 ns/iter (± 18) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.