Skip to content

Commit

Permalink
WIP, adding RegexOptions.AnyNewLine
Browse files Browse the repository at this point in the history
  • Loading branch information
Mpdreamz committed Jun 2, 2018
1 parent c86814f commit 4c4d688
Show file tree
Hide file tree
Showing 14 changed files with 78 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
# Visual Studio 15
VisualStudioVersion = 15.0.27703.2026
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "System.Text.RegularExpressions.Tests", "tests\System.Text.RegularExpressions.Tests.csproj", "{94B106C2-D574-4392-80AB-3EE308A078DF}"
ProjectSection(ProjectDependencies) = postProject
{2C58640B-5BED-4E83-9554-CD2B9762643F} = {2C58640B-5BED-4E83-9554-CD2B9762643F}
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "System.Text.RegularExpressions.Performance.Tests", "tests\Performance\System.Text.RegularExpressions.Performance.Tests.csproj", "{7f4b8c48-8692-4885-bf84-feb7ea82e34b}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "System.Text.RegularExpressions.Performance.Tests", "tests\Performance\System.Text.RegularExpressions.Performance.Tests.csproj", "{7F4B8C48-8692-4885-BF84-FEB7EA82E34B}"
ProjectSection(ProjectDependencies) = postProject
{2C58640B-5BED-4E83-9554-CD2B9762643F} = {2C58640B-5BED-4E83-9554-CD2B9762643F}
EndProjectSection
Expand Down Expand Up @@ -35,10 +35,10 @@ Global
{94B106C2-D574-4392-80AB-3EE308A078DF}.Debug|Any CPU.Build.0 = netcoreapp-Debug|Any CPU
{94B106C2-D574-4392-80AB-3EE308A078DF}.Release|Any CPU.ActiveCfg = netcoreapp-Release|Any CPU
{94B106C2-D574-4392-80AB-3EE308A078DF}.Release|Any CPU.Build.0 = netcoreapp-Release|Any CPU
{7f4b8c48-8692-4885-bf84-feb7ea82e34b}.Debug|Any CPU.ActiveCfg = netcoreapp-Debug|Any CPU
{7f4b8c48-8692-4885-bf84-feb7ea82e34b}.Debug|Any CPU.Build.0 = netcoreapp-Debug|Any CPU
{7f4b8c48-8692-4885-bf84-feb7ea82e34b}.Release|Any CPU.ActiveCfg = netcoreapp-Release|Any CPU
{7f4b8c48-8692-4885-bf84-feb7ea82e34b}.Release|Any CPU.Build.0 = netcoreapp-Release|Any CPU
{7F4B8C48-8692-4885-BF84-FEB7EA82E34B}.Debug|Any CPU.ActiveCfg = netcoreapp-Debug|Any CPU
{7F4B8C48-8692-4885-BF84-FEB7EA82E34B}.Debug|Any CPU.Build.0 = netcoreapp-Debug|Any CPU
{7F4B8C48-8692-4885-BF84-FEB7EA82E34B}.Release|Any CPU.ActiveCfg = netcoreapp-Release|Any CPU
{7F4B8C48-8692-4885-BF84-FEB7EA82E34B}.Release|Any CPU.Build.0 = netcoreapp-Release|Any CPU
{2C58640B-5BED-4E83-9554-CD2B9762643F}.Debug|Any CPU.ActiveCfg = netcoreapp-Debug|Any CPU
{2C58640B-5BED-4E83-9554-CD2B9762643F}.Debug|Any CPU.Build.0 = netcoreapp-Debug|Any CPU
{2C58640B-5BED-4E83-9554-CD2B9762643F}.Release|Any CPU.ActiveCfg = netcoreapp-Release|Any CPU
Expand All @@ -53,8 +53,11 @@ Global
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{94B106C2-D574-4392-80AB-3EE308A078DF} = {1A2F9F4A-A032-433E-B914-ADD5992BB178}
{7f4b8c48-8692-4885-bf84-feb7ea82e34b} = {1A2F9F4A-A032-433E-B914-ADD5992BB178}
{7F4B8C48-8692-4885-BF84-FEB7EA82E34B} = {1A2F9F4A-A032-433E-B914-ADD5992BB178}
{2C58640B-5BED-4E83-9554-CD2B9762643F} = {E107E9C1-E893-4E87-987E-04EF0DCEAEFD}
{B262B15E-13E6-4C1E-A25E-16D06E222A09} = {2E666815-2EDB-464B-9DF6-380BF4789AD4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {B70BAD11-A1AC-49B3-AD52-A5E52D06922A}
EndGlobalSection
EndGlobal
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ void System.Runtime.Serialization.ISerializable.GetObjectData(System.Runtime.Ser
public enum RegexOptions
{
Compiled = 8,
AnyNewLine = 1024,
CultureInvariant = 512,
ECMAScript = 256,
ExplicitCapture = 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace System.Text.RegularExpressions
/// </summary>
public partial class Regex : ISerializable
{
internal const int MaxOptionShift = 10;
internal const int MaxOptionShift = 11;

protected internal string pattern; // The string pattern provided
protected internal RegexOptions roptions; // the top-level options from the options string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ internal sealed class RegexCode
public const int Back = 128; // bit to indicate that we're backtracking.
public const int Back2 = 256; // bit to indicate that we're backtracking on a second branch.
public const int Ci = 512; // bit to indicate that we're case-insensitive.
public const int AnyNewLine = 1024; // accept \n OR \r\n as newline $

public readonly int[] Codes; // the code
public readonly string[] Strings; // the string/set table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ internal ref struct RegexFCD
public const int End = 0x0020;
public const int Boundary = 0x0040;
public const int ECMABoundary = 0x0080;
public const int AnyNewLine = 0x00100;

private readonly List<RegexFC> _fcStack;
private ValueListBuilder<int> _intStack; // must not be readonly
Expand Down Expand Up @@ -125,6 +126,7 @@ public static RegexPrefix Prefix(RegexTree tree)

case RegexNode.Bol:
case RegexNode.Eol:
case RegexNode.AnyNewLine:
case RegexNode.Boundary:
case RegexNode.ECMABoundary:
case RegexNode.Beginning:
Expand Down Expand Up @@ -180,6 +182,7 @@ public static int Anchors(RegexTree tree)

case RegexNode.Bol:
case RegexNode.Eol:
case RegexNode.AnyNewLine:
case RegexNode.Boundary:
case RegexNode.ECMABoundary:
case RegexNode.Beginning:
Expand Down Expand Up @@ -213,6 +216,7 @@ private static int AnchorFromType(int type)
{
case RegexNode.Bol: return Bol;
case RegexNode.Eol: return Eol;
case RegexNode.AnyNewLine: return AnyNewLine;
case RegexNode.Boundary: return Boundary;
case RegexNode.ECMABoundary: return ECMABoundary;
case RegexNode.Beginning: return Beginning;
Expand Down Expand Up @@ -512,6 +516,7 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex)
case RegexNode.Start:
case RegexNode.EndZ:
case RegexNode.End:
case RegexNode.AnyNewLine:
PushFC(new RegexFC(true));
break;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,16 @@ protected override void Go()
advance = 0;
continue;

case RegexCode.AnyNewLine:
var rightChars = Rightchars();
if (rightChars == 1 && CharAt(Textpos()) != '\n')
break;
if (rightChars == 2 && (CharAt(Textpos()) != '\r' || CharAt(Textpos() + 1) != '\n'))
break;

advance = rightChars == 1 ? 0 : 1;
continue;

case RegexCode.Boundary:
if (!IsBoundary(Textpos(), runtextbeg, runtextend))
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ internal sealed class RegexNode
public const int Start = RegexCode.Start; // \G
public const int EndZ = RegexCode.EndZ; // \Z
public const int End = RegexCode.End; // \z
public const int AnyNewLine = RegexCode.AnyNewLine; // accept \n OR \r\n as newline $

// Interior nodes do not correspond to primitive operations, but
// control structures compositing other operations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ public enum RegexOptions

ECMAScript = 0x0100, // "e"
CultureInvariant = 0x0200,
AnyNewLine = 0x0400,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ private void SetPattern(string Re)
{
if (Re == null)
Re = string.Empty;



_pattern = Re;
_currentPos = 0;
}
Expand Down Expand Up @@ -335,7 +338,12 @@ private RegexNode ScanRegex()
break;

case '$':
AddUnitType(UseOptionM() ? RegexNode.Eol : RegexNode.EndZ);
if (UseOptionAnyNewLine())
{
AddUnitType(RegexNode.AnyNewLine);
}
else
AddUnitType(UseOptionM() ? RegexNode.Eol : RegexNode.EndZ);
break;

case '.':
Expand Down Expand Up @@ -1946,6 +1954,15 @@ private bool UseOptionM()
return (_options & RegexOptions.Multiline) != 0;
}

/*
* True if AnyNewLine was set.
*/
private bool UseOptionAnyNewLine()
{
return (_options & RegexOptions.AnyNewLine) != 0;
}


/*
* True if S option altering meaning of . is on.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex)
case RegexNode.Start:
case RegexNode.EndZ:
case RegexNode.End:
case RegexNode.AnyNewLine:
Emit(node.NType);
break;

Expand Down
4 changes: 2 additions & 2 deletions src/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ public static void Ctor_Invalid()
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", (RegexOptions)(-1)));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", (RegexOptions)(-1), new TimeSpan()));

AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", (RegexOptions)0x400));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", (RegexOptions)0x400, new TimeSpan()));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", (RegexOptions)0x800));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", (RegexOptions)0x800, new TimeSpan()));

AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.RightToLeft));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new Regex("foo", RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture));
Expand Down
21 changes: 21 additions & 0 deletions src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,17 @@ public static IEnumerable<object[]> Match_Advanced_TestData()
}
};

// Mutliline
yield return new object[]
{
"(line3\n$\n)line4", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 24,
new CaptureData[]
{
new CaptureData("line3\n\nline4", 12, 12),
new CaptureData("line3\n\n", 12, 7)
}
};

// RightToLeft
yield return new object[]
{
Expand Down Expand Up @@ -825,5 +836,15 @@ public void IsMatch_Invalid()
Assert.Throws<ArgumentOutOfRangeException>(() => new Regex("pattern").IsMatch("input", -1));
Assert.Throws<ArgumentOutOfRangeException>(() => new Regex("pattern").IsMatch("input", 6));
}

[Fact]
public void MatchAnyNewLine()
{
//var plainMatch = Regex.Match("foo\r\nbar", ".*$", RegexOptions.Multiline);
//Assert.Equal(4, plainMatch.Length);

var anyMatch = Regex.Match("foo\r\nbar", ".*$", RegexOptions.Multiline | RegexOptions.AnyNewLine);
Assert.Equal(3, anyMatch.Length);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ public void Matches_Invalid()
// Options are invalid
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => Regex.Matches("input", "pattern", (RegexOptions)(-1)));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => Regex.Matches("input", "pattern", (RegexOptions)(-1), TimeSpan.FromSeconds(1)));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x400));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x400, TimeSpan.FromSeconds(1)));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x800));
AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x800, TimeSpan.FromSeconds(1)));

// MatchTimeout is invalid
AssertExtensions.Throws<ArgumentOutOfRangeException>("matchTimeout", () => Regex.Matches("input", "pattern", RegexOptions.None, TimeSpan.Zero));
Expand Down
3 changes: 3 additions & 0 deletions src/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ public static IEnumerable<object[]> Split_NonCompiled_TestData()
yield return new object[] { @"\d", "1a2b3c4d5e6f7g8h9i0k", RegexOptions.RightToLeft, 10, 20, new string[] { "1a", "b", "c", "d", "e", "f", "g", "h", "i", "k" } };
yield return new object[] { @"\d", "1a2b3c4d5e6f7g8h9i0k", RegexOptions.RightToLeft, 2, 20, new string[] { "1a2b3c4d5e6f7g8h9i", "k" } };
yield return new object[] { @"\d", "1a2b3c4d5e6f7g8h9i0k", RegexOptions.RightToLeft, 1, 20, new string[] { "1a2b3c4d5e6f7g8h9i0k" } };

// AnyNewLine
yield return new object[] { @"$", "aaaa\r\nbbbbb", RegexOptions.Multiline | RegexOptions.AnyNewLine, 7, 0, new string[] { "aaaa", "bbbb" } };
}

[Theory]
Expand Down

0 comments on commit 4c4d688

Please sign in to comment.