Skip to content

Commit

Permalink
Fixed a error #73 “HtmlMinifier.Minify hangs permanently”
Browse files Browse the repository at this point in the history
  • Loading branch information
Taritsyn committed Mar 5, 2019
1 parent 6b61171 commit 41a8460
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 62 deletions.
13 changes: 7 additions & 6 deletions WebMarkupMin.NoSamples.sln
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "html", "html", "{E16C8CAD-9
ProjectSection(SolutionItems) = preProject
test\SharedFiles\html\html-document-with-bom-at-start.html = test\SharedFiles\html\html-document-with-bom-at-start.html
test\SharedFiles\html\html-document-with-bom-in-body-tag.html = test\SharedFiles\html\html-document-with-bom-in-body-tag.html
test\SharedFiles\html\html-document-with-invalid-characters.html = test\SharedFiles\html\html-document-with-invalid-characters.html
test\SharedFiles\html\html-document-without-bom.html = test\SharedFiles\html\html-document-without-bom.html
EndProjectSection
EndProject
Expand Down Expand Up @@ -49,7 +50,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebMarkupMin.AspNetCore1",
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebMarkupMin.AspNetCore2", "src\WebMarkupMin.AspNetCore2\WebMarkupMin.AspNetCore2.csproj", "{FB743652-E1C4-40B3-B499-53780DCBAA8F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WebMarkupMin.Benchmarks", "test\WebMarkupMin.Benchmarks\WebMarkupMin.Benchmarks.csproj", "{213B17DB-6990-43CA-9585-A1557C6D27E8}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebMarkupMin.Benchmarks", "test\WebMarkupMin.Benchmarks\WebMarkupMin.Benchmarks.csproj", "{213B17DB-6990-43CA-9585-A1557C6D27E8}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebMarkupMin.Core.Test", "test\WebMarkupMin.Core.Test\WebMarkupMin.Core.Test.csproj", "{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}"
EndProject
Expand Down Expand Up @@ -107,14 +108,14 @@ Global
{FB743652-E1C4-40B3-B499-53780DCBAA8F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FB743652-E1C4-40B3-B499-53780DCBAA8F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FB743652-E1C4-40B3-B499-53780DCBAA8F}.Release|Any CPU.Build.0 = Release|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Release|Any CPU.Build.0 = Release|Any CPU
{213B17DB-6990-43CA-9585-A1557C6D27E8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{213B17DB-6990-43CA-9585-A1557C6D27E8}.Debug|Any CPU.Build.0 = Debug|Any CPU
{213B17DB-6990-43CA-9585-A1557C6D27E8}.Release|Any CPU.ActiveCfg = Release|Any CPU
{213B17DB-6990-43CA-9585-A1557C6D27E8}.Release|Any CPU.Build.0 = Release|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -135,8 +136,8 @@ Global
{27D7454A-F570-4ACC-BB7E-1DD5B264AC22} = {227F2DF0-FCC8-4548-A4F9-6DA5BE01918E}
{5F014AC3-7F5D-4EBB-850C-01513CBF1D49} = {227F2DF0-FCC8-4548-A4F9-6DA5BE01918E}
{FB743652-E1C4-40B3-B499-53780DCBAA8F} = {227F2DF0-FCC8-4548-A4F9-6DA5BE01918E}
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68} = {1AEA1D1F-45BC-4A6C-BCD8-71C50DC839BA}
{213B17DB-6990-43CA-9585-A1557C6D27E8} = {1AEA1D1F-45BC-4A6C-BCD8-71C50DC839BA}
{9125BDF2-885B-45DB-98AA-3DD8C2EA4A68} = {1AEA1D1F-45BC-4A6C-BCD8-71C50DC839BA}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {7E056B7D-640D-49E5-BFD7-D64C25744378}
Expand Down
100 changes: 50 additions & 50 deletions src/WebMarkupMin.Core/Parsers/HtmlParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,12 @@ internal sealed class HtmlParser : MarkupParserBase
#region Regular expressions for parsing tags and attributes

private static readonly Regex _xmlDeclarationRegex = new Regex(@"^<\?xml\s+[^>]+\s*\?>", RegexOptions.IgnoreCase);
private static readonly Regex _startTagRegex = new Regex(@"^<(?<tagName>" + CommonRegExps.HtmlTagNamePattern + ")" +
"(?:" +
"(?:" +
@"(?:\s+|(?<=[""']))" + CommonRegExps.HtmlAttributeNamePattern +
@"(?:\s*=\s*(?:(?:""[^""]*"")|(?:'[^']*')|[^\s""'`=<>]+)?)?" +
")" +
@"|(?:\s*(?<invalidCharacters>(?:[^/>\s][^>\s]*?)|(?:/[^>\s]*?(?!>))))" +
")*" +
@"\s*(?<emptyTagSlash>/)?>");
private static readonly Regex _startTagBeginPartRegex = new Regex(@"^<(?<tagName>" + CommonRegExps.HtmlTagNamePattern + ")");
private static readonly Regex _startTagEndPartRegex = new Regex(@"^\s*(?<emptyTagSlash>/)?>");
private static readonly Regex _endTagRegex = new Regex(@"^<\/(?<tagName>" + CommonRegExps.HtmlTagNamePattern + @")\s*>");
private static readonly Regex _attributeRegex =
new Regex(@"(?<attributeName>" + CommonRegExps.HtmlAttributeNamePattern + @")" +
new Regex(@"(?:\s+|(?<=[""']))" +
"(?<attributeName>" + CommonRegExps.HtmlAttributeNamePattern + @")" +
"(?:" +
@"\s*(?<attributeEqualSign>=)\s*" +
"(?:" +
Expand Down Expand Up @@ -533,49 +527,51 @@ private bool ProcessStartTag()
{
bool isProcessed = false;
string content = _innerContext.SourceCode;
int contentPosition = _innerContext.Position;
int contentRemainderLength = _innerContext.RemainderLength;

var match = _startTagRegex.Match(content, contentPosition, contentRemainderLength);
if (match.Success)
Match startTagBeginPartMatch = _startTagBeginPartRegex.Match(content, _innerContext.Position,
_innerContext.RemainderLength);
if (startTagBeginPartMatch.Success)
{
GroupCollection groups = match.Groups;

Group startTagNameGroup = groups["tagName"];
string startTagName = startTagNameGroup.Value;
string startTagName = startTagBeginPartMatch.Groups["tagName"].Value;
string startTagNameInLowercase = startTagName;
if (Utils.ContainsUppercaseCharacters(startTagName))
{
startTagNameInLowercase = startTagName.ToLowerInvariant();
}

Group invalidCharactersGroup = groups["invalidCharacters"];
if (invalidCharactersGroup.Success)
{
int invalidCharactersPosition = invalidCharactersGroup.Index;
int invalidCharactersOffset = invalidCharactersPosition - contentPosition;
_innerContext.IncreasePosition(startTagBeginPartMatch.Length);

_innerContext.IncreasePosition(invalidCharactersOffset);
IList<HtmlAttribute> attributes = ProcessAttributes();

throw new MarkupParsingException(
string.Format(Strings.ErrorMessage_InvalidCharactersInStartTag, startTagName),
_innerContext.NodeCoordinates, _innerContext.GetSourceFragment());
}
int currentPosition = _innerContext.Position;
int currentRemainderLength = _innerContext.RemainderLength;

int attributesPosition = startTagNameGroup.Index + startTagNameGroup.Length;
int attributesOffset = attributesPosition - contentPosition;
int startTagRemainderLength = contentPosition + match.Length - attributesPosition;
Match startTagEndPartMatch = _startTagEndPartRegex.Match(content, currentPosition,
currentRemainderLength);
if (startTagEndPartMatch.Success)
{
bool isEmptyTag = startTagEndPartMatch.Groups["emptyTagSlash"].Success;

_innerContext.IncreasePosition(attributesOffset);
ParseStartTag(startTagName, startTagNameInLowercase, attributes, isEmptyTag);

IList<HtmlAttribute> attributes = ParseAttributes(content, attributesPosition,
startTagRemainderLength, _innerContext.NodeCoordinates);
bool isEmptyTag = groups["emptyTagSlash"].Success;
_innerContext.IncreasePosition(startTagEndPartMatch.Length);
isProcessed = true;
}
else
{
int invalidCharPosition = SourceCodeNavigator.FindNextNonWhitespaceChar(content,
currentPosition, currentRemainderLength);

ParseStartTag(startTagName, startTagNameInLowercase, attributes, isEmptyTag);
int invalidCharOffset = invalidCharPosition - currentPosition;
if (invalidCharOffset > 0)
{
_innerContext.IncreasePosition(invalidCharOffset);
}

_innerContext.IncreasePosition(startTagRemainderLength);
isProcessed = true;
throw new MarkupParsingException(
string.Format(Strings.ErrorMessage_InvalidCharactersInStartTag, startTagName),
_innerContext.NodeCoordinates, _innerContext.GetSourceFragment());
}
}

return isProcessed;
Expand Down Expand Up @@ -855,23 +851,26 @@ private void ParseStartTag(string tagName, string tagNameInLowercase, IList<Html
}

/// <summary>
/// Parses a attributes
/// Process a attributes
/// </summary>
/// <param name="sourceCode">Source code</param>
/// <param name="attributesPosition">Start position of attributes</param>
/// <param name="startTagRemainderLength">Length of attributes and remaining characters in start tag</param>
/// <param name="attributesCoordinates">Attributes coordinates</param>
/// <returns>List of attributes</returns>
private IList<HtmlAttribute> ParseAttributes(string sourceCode, int attributesPosition,
int startTagRemainderLength, SourceCodeNodeCoordinates attributesCoordinates)
private IList<HtmlAttribute> ProcessAttributes()
{
Match match = _attributeRegex.Match(sourceCode, attributesPosition, startTagRemainderLength);
string content = _innerContext.SourceCode;
int currentPosition = _innerContext.Position;
int currentRemainderLength = _innerContext.RemainderLength;
SourceCodeNodeCoordinates currentCoordinates = _innerContext.NodeCoordinates;

Match match = _attributeRegex.Match(content, currentPosition, currentRemainderLength);
var attributes = new List<HtmlAttribute>();
int currentPosition = attributesPosition;
SourceCodeNodeCoordinates currentCoordinates = attributesCoordinates;

while (match.Success)
{
if (match.Index != _innerContext.Position)
{
break;
}

GroupCollection groups = match.Groups;
Group attributeNameGroup = groups["attributeName"];
Group attributeEqualSignGroup = groups["attributeEqualSign"];
Expand Down Expand Up @@ -913,7 +912,7 @@ private IList<HtmlAttribute> ParseAttributes(string sourceCode, int attributesPo
int lineBreakCount;
int charRemainderCount;

SourceCodeNavigator.CalculateLineBreakCount(sourceCode, currentPosition,
SourceCodeNavigator.CalculateLineBreakCount(content, currentPosition,
attributeNamePosition - currentPosition, out lineBreakCount, out charRemainderCount);
attributeNameCoordinates = SourceCodeNavigator.CalculateAbsoluteNodeCoordinates(
currentCoordinates, lineBreakCount, charRemainderCount);
Expand All @@ -934,7 +933,7 @@ private IList<HtmlAttribute> ParseAttributes(string sourceCode, int attributesPo
int lineBreakCount;
int charRemainderCount;

SourceCodeNavigator.CalculateLineBreakCount(sourceCode, currentPosition,
SourceCodeNavigator.CalculateLineBreakCount(content, currentPosition,
attributeValuePosition - currentPosition, out lineBreakCount, out charRemainderCount);
attributeValueCoordinates = SourceCodeNavigator.CalculateAbsoluteNodeCoordinates(
currentCoordinates, lineBreakCount, charRemainderCount);
Expand All @@ -947,6 +946,7 @@ private IList<HtmlAttribute> ParseAttributes(string sourceCode, int attributesPo
HtmlAttributeType.Unknown, attributeNameCoordinates, attributeValueCoordinates);
attributes.Add(attribute);

_innerContext.IncreasePosition(match.Length);
match = match.NextMatch();
}

Expand Down
25 changes: 25 additions & 0 deletions src/WebMarkupMin.Core/Utilities/SourceCodeNavigator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,31 @@ private static void FindPreviousLineBreak(string sourceCode, int startPosition,
}
}

/// <summary>
/// Finds a next non-whitespace character
/// </summary>
/// <param name="sourceCode">Source code</param>
/// <param name="startPosition">Position in the input string that defines the leftmost
/// position to be searched</param>
/// <param name="length">Number of characters in the substring to include in the search</param>
/// <returns>Position of non-whitespace character</returns>
internal static int FindNextNonWhitespaceChar(string sourceCode, int startPosition, int length)
{
int charPosition;
int endPosition = startPosition + length - 1;

for (charPosition = startPosition; charPosition <= endPosition; charPosition++)
{
char charValue = sourceCode[charPosition];
if (!char.IsWhiteSpace(charValue))
{
break;
}
}

return charPosition;
}

/// <summary>
/// Calculates a line break count
/// </summary>
Expand Down
3 changes: 2 additions & 1 deletion src/WebMarkupMin.Core/WebMarkupMin.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Minification of markup produces by removing extra whitespaces, comments and redu

Also supports minification of views of popular JavaScript template engines: KnockoutJS, Kendo UI MVVM and AngularJS 1.X.</Description>
<PackageTags>WebMarkupMin;Markup;HTML;XHTML;XML;Minification;Minifier;Minify;Performance;Optimization;Compression</PackageTags>
<PackageReleaseNotes>Fixed a error #77 “HtmlMinifier.Minify throws InvalidOperationException”.</PackageReleaseNotes>
<PackageReleaseNotes>1. Fixed a error #73 “HtmlMinifier.Minify hangs permanently”;
2. Fixed a error #77 “HtmlMinifier.Minify throws InvalidOperationException”.</PackageReleaseNotes>
</PropertyGroup>

<Import Project="../../build/common.props" />
Expand Down
3 changes: 2 additions & 1 deletion src/WebMarkupMin.Core/readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
=============
RELEASE NOTES
=============
Fixed a error #77 “HtmlMinifier.Minify throws InvalidOperationException”.
1. Fixed a error #73 “HtmlMinifier.Minify hangs permanently”;
2. Fixed a error #77 “HtmlMinifier.Minify throws InvalidOperationException”.

=============
DOCUMENTATION
Expand Down
Binary file not shown.
25 changes: 21 additions & 4 deletions test/WebMarkupMin.Core.Test/Html/ParsingTests.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
using System.Collections.Generic;
using System.IO;

using Xunit;

namespace WebMarkupMin.Core.Test.Html
{
public class ParsingTests
public class ParsingTests : FileSystemTestsBase
{
private readonly string _htmlFilesDirectoryPath;


public ParsingTests()
{
_htmlFilesDirectoryPath = Path.GetFullPath(Path.Combine(_baseDirectoryPath, @"../SharedFiles/html/"));
}


[Fact]
public void ParsingNonTrivialMarkupIsCorrect()
{
Expand Down Expand Up @@ -259,6 +269,8 @@ public void ProcessingInvalidCharactersInStartTagIsCorrect()
"</table."
;
const string input11 = "<link id=\"favicon\" rel=?\"shortcut icon\" type=?\"image/?png\" href=?\"#\">";
string input12 = File.ReadAllText(
Path.Combine(_htmlFilesDirectoryPath, "html-document-with-invalid-characters.html"));

// Act
IList<MinificationErrorInfo> errors1 = minifier.Minify(input1).Errors;
Expand All @@ -272,6 +284,7 @@ public void ProcessingInvalidCharactersInStartTagIsCorrect()
IList<MinificationErrorInfo> errors9 = minifier.Minify(input9).Errors;
IList<MinificationErrorInfo> errors10 = minifier.Minify(input10).Errors;
IList<MinificationErrorInfo> errors11 = minifier.Minify(input11).Errors;
IList<MinificationErrorInfo> errors12 = minifier.Minify(input12).Errors;

// Assert
Assert.Equal(1, errors1.Count);
Expand All @@ -284,7 +297,7 @@ public void ProcessingInvalidCharactersInStartTagIsCorrect()

Assert.Equal(1, errors3.Count);
Assert.Equal(1, errors3[0].LineNumber);
Assert.Equal(205, errors3[0].ColumnNumber);
Assert.Equal(120, errors3[0].ColumnNumber);

Assert.Equal(1, errors4.Count);
Assert.Equal(1, errors4[0].LineNumber);
Expand All @@ -304,7 +317,7 @@ public void ProcessingInvalidCharactersInStartTagIsCorrect()

Assert.Equal(1, errors8.Count);
Assert.Equal(2, errors8[0].LineNumber);
Assert.Equal(80, errors8[0].ColumnNumber);
Assert.Equal(46, errors8[0].ColumnNumber);

Assert.Equal(1, errors9.Count);
Assert.Equal(1, errors9[0].LineNumber);
Expand All @@ -316,7 +329,11 @@ public void ProcessingInvalidCharactersInStartTagIsCorrect()

Assert.Equal(1, errors11.Count);
Assert.Equal(1, errors11[0].LineNumber);
Assert.Equal(68, errors11[0].ColumnNumber);
Assert.Equal(25, errors11[0].ColumnNumber);

Assert.Equal(1, errors12.Count);
Assert.Equal(1, errors12[0].LineNumber);
Assert.Equal(3, errors12[0].ColumnNumber);
}

[Fact]
Expand Down

0 comments on commit 41a8460

Please sign in to comment.