Skip to content

Commit

Permalink
Merge pull request #49 from CarlVerret/dlemire/fast_utf8
Browse files Browse the repository at this point in the history
UTF8/ASCII-based parser
  • Loading branch information
lemire authored Feb 26, 2021
2 parents fe3a8d9 + c108400 commit 73c31a9
Show file tree
Hide file tree
Showing 9 changed files with 735 additions and 24 deletions.
29 changes: 29 additions & 0 deletions Benchmark/FastParserBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ namespace csFastFloat.Benchmark
public class FFBencmark
{
private string[] _lines;
private byte[][] _linesUtf8;

private class Config : ManualConfig
{
Expand All @@ -34,6 +35,33 @@ public Config()
}
}

[Benchmark(Description = "Utf8Parser")]
public double Utf8Parser()
{
double max = double.MinValue;

foreach (byte[] l in _linesUtf8)
{
if (!System.Buffers.Text.Utf8Parser.TryParse(l, out double d, out int consumed) || consumed != l.Length)
throw new InvalidOperationException();

max = d > max ? d : max;
}
return max;
}

[Benchmark(Description = "FastFloat.ParseDouble() - UTF8")]
public double FastParserUtf8_()
{
double max = double.MinValue;

foreach (byte[] l in _linesUtf8)
{
double d = FastDoubleParser.ParseDouble(l);
max = d > max ? d : max;
}
return max;
}

[Benchmark(Description = "FastFloat.ParseDouble()")]
public double FastParser_()
Expand Down Expand Up @@ -98,6 +126,7 @@ public void Setup()
{
Console.WriteLine("reading data");
_lines = System.IO.File.ReadAllLines(FileName);
_linesUtf8 = Array.ConvertAll(_lines, System.Text.Encoding.UTF8.GetBytes);
}
}

Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ Job=.NET Core 5.0 Runtime=.NET Core 5.0

# Usage

Two functions are available. ParseDouble and ParseFloat.
Two functions are available: `FastDoubleParser.ParseDouble` and `FastFloatParser.ParseFloat`.

String, char * and ReadOnlySpan<char> are supported inputs.
`String` and `ReadOnlySpan<char>` are supported inputs.

```C#
using csFastFloat;
Expand All @@ -100,6 +100,10 @@ foreach (string l in lines)
}
```

Input strings are expected to be valid UTF-16.

For UTF-8 or ASCII inputs, you may pass a `ReadOnlySpan<byte>` argument.

# Testing

The set of unit tests in /TestcsFastFloat project combines unit tests from many libraries. It includes tests used by the Go Team.
Expand Down
10 changes: 7 additions & 3 deletions TestcsFastFloat/Basic/BasicTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,12 @@ private void TestInfinity_Double(string sut, double expected_value)
[InlineData("1.7976931348623158e308", 1.7976931348623157e+308)] // 0x1.fffffffffffffp + 1023)]
[InlineData("9007199254740993.0", 9007199254740992.0)] // 0x1p53)]
[Theory]
private void TestGeneral_Double(string sut, double expected_value) => Assert.Equal(expected_value, FastDoubleParser.ParseDouble(sut));

private void TestGeneral_Double(string sut, double expected_value)
{
Assert.Equal(expected_value, FastDoubleParser.ParseDouble(sut));
Assert.Equal(expected_value, FastDoubleParser.ParseDouble(sut.AsSpan()));
Assert.Equal(expected_value, FastDoubleParser.ParseDouble(System.Text.Encoding.UTF8.GetBytes(sut)));
}
[Trait("Category", "Smoke Test")]
[InlineData("1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125", 655, "", 1.17549419)]
[InlineData("1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125", 656, "", 1.17549419)]
Expand All @@ -240,7 +244,6 @@ private void TestInfinity_Double(string sut, double expected_value)
[InlineData("1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125", 1000, "e-38", 1.1754941E-38)]
[Theory]
private void TestGeneral_Float_appendZeros(string sut, int zeros, string exp, float expected_value) => Assert.Equal(expected_value, FastFloatParser.ParseFloat(sut.PadRight(zeros, '0') + exp));

// //verify32(1.00000006e+09f)]
////verify32(1.4012984643e-45f)]
////verify32(1.1754942107e-38f)]
Expand Down Expand Up @@ -311,6 +314,7 @@ private void TestGeneral_Float(string sut, float expected_value)
{
Assert.Equal(expected_value, FastFloatParser.ParseFloat(sut));
Assert.Equal(expected_value, FastFloatParser.ParseFloat(sut.AsSpan()));
Assert.Equal(expected_value, FastFloatParser.ParseFloat(System.Text.Encoding.UTF8.GetBytes(sut)));
}
}
}
12 changes: 12 additions & 0 deletions TestcsFastFloat/suppl_tests/SupplFilesTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@ private static void VerifyFile(string fileName)
Assert.True(_f == f);
double d = FastDoubleParser.ParseDouble(sut[3]);
Assert.True(_d == d);

// parse and assert equality
float f_span = FastFloatParser.ParseFloat(sut[3].AsSpan());
Assert.True(_f == f_span);
double d_span = FastDoubleParser.ParseDouble(sut[3].AsSpan());
Assert.True(_d == d_span);

// parse and assert equality
float f_utf8 = FastFloatParser.ParseFloat(System.Text.Encoding.UTF8.GetBytes(sut[3]));
Assert.True(_f == f_utf8);
double d_utf8 = FastDoubleParser.ParseDouble(System.Text.Encoding.UTF8.GetBytes(sut[3]));
Assert.True(_d == d_utf8);
}
catch (Exception ex)
{
Expand Down
98 changes: 98 additions & 0 deletions csFastFloat/FastDoubleParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,49 @@ unsafe static internal double ParseNumber(char* first, char* last, chars_format
return ToFloat(pns.negative, am);
}

unsafe static internal Double ParseNumber (byte* first, byte* last, chars_format expectedFormat = chars_format.is_general, byte decimal_separator = (byte)'.')
{
while ((first != last) && Utils.is_space(*first))
{
first++;
}
if (first == last)
{
ThrowArgumentException();
}
ParsedNumberString pns = ParsedNumberString.ParseNumberString(first, last, expectedFormat);
if (!pns.valid)
{
return HandleInvalidInput(first, last);
}

// Next is Clinger's fast path.
if (DoubleBinaryConstants.min_exponent_fast_path <= pns.exponent && pns.exponent <= DoubleBinaryConstants.max_exponent_fast_path && pns.mantissa <= DoubleBinaryConstants.max_mantissa_fast_path && !pns.too_many_digits)
{
return FastPath(pns);
}

AdjustedMantissa am = ComputeFloat(pns.exponent, pns.mantissa);
if (pns.too_many_digits)
{
if (am != ComputeFloat(pns.exponent, pns.mantissa + 1))
{
am.power2 = -1; // value is invalid.
}
}
// If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
// then we need to go the long way around again. This is very uncommon.
if (am.power2 < 0) { am = ParseLongMantissa(first, last, decimal_separator); }
return ToFloat(pns.negative, am);
}

public static unsafe double ParseDouble(ReadOnlySpan<byte> s, chars_format expectedFormat = chars_format.is_general, byte decimal_separator = (byte)'.')
{
fixed(byte* pStart = s)
{
return ParseNumber(pStart, pStart + s.Length, expectedFormat, decimal_separator);
}
}

/// <summary>
///
Expand Down Expand Up @@ -363,12 +405,19 @@ internal static AdjustedMantissa ComputeFloat(DecimalInfo d)
return answer;
}

// UTF-16 inputs
unsafe static internal AdjustedMantissa ParseLongMantissa(char* first, char* last, char decimal_separator)
{
DecimalInfo d = DecimalInfo.parse_decimal(first, last, decimal_separator);
return ComputeFloat(d);
}

// UTF-8/ASCII inputs
unsafe static internal AdjustedMantissa ParseLongMantissa(byte* first, byte* last, byte decimal_separator)
{
DecimalInfo d = DecimalInfo.parse_decimal(first, last, decimal_separator);
return ComputeFloat(d);
}


unsafe static internal double HandleInvalidInput(char* first, char* last)
Expand Down Expand Up @@ -404,6 +453,55 @@ unsafe static internal double HandleInvalidInput(char* first, char* last)
}


unsafe static internal double HandleInvalidInput(byte* first, byte* last)
{
// C# does not (yet) allow literal ASCII strings (it uses UTF-16), so
// we need to use byte arrays.
// "infinity" string in ASCII, e.g., 105 = i
ReadOnlySpan<byte> infinity_string = new byte[]{105, 110, 102, 105, 110, 105, 116, 121};
// "inf" string in ASCII
ReadOnlySpan<byte> inf_string = new byte[]{105, 110, 102};
// "+inf" string in ASCII
ReadOnlySpan<byte> pinf_string = new byte[]{43, 105, 110, 102};
// "-inf" string in ASCII
ReadOnlySpan<byte> minf_string = new byte[]{5, 105, 110, 102};
// "nan" string in ASCII
ReadOnlySpan<byte> nan_string = new byte[]{110, 97, 110};
// "-nan" string in ASCII
ReadOnlySpan<byte> mnan_string = new byte[]{45, 110, 97, 110};
// "+nan" string in ASCII
ReadOnlySpan<byte> pnan_string = new byte[]{43, 110, 97, 110};

if (last - first >= 3)
{
if (Utils.strncasecmp(first, nan_string, 3))
{
return DoubleBinaryConstants.NaN;
}
if (Utils.strncasecmp(first, inf_string, 3))
{
if ((last - first >= 8) && Utils.strncasecmp(first, infinity_string, 8))
return DoubleBinaryConstants.PositiveInfinity;
return DoubleBinaryConstants.PositiveInfinity;
}
if (last - first >= 4)
{
if (Utils.strncasecmp(first, pnan_string, 4) || Utils.strncasecmp(first, mnan_string, 4))
{
return DoubleBinaryConstants.NaN;
}
if (Utils.strncasecmp(first, pinf_string, 4) ||
Utils.strncasecmp(first, minf_string, 4) ||
((last - first >= 8) && Utils.strncasecmp(first + 1, infinity_string, 8)))
{
return (first[0] == '-') ? DoubleBinaryConstants.NegativeInfinity : DoubleBinaryConstants.PositiveInfinity;
}
}
}
ThrowArgumentException();
return 0d;
}




Expand Down
Loading

0 comments on commit 73c31a9

Please sign in to comment.