diff --git a/Examples/.vscode/launch.json b/Examples/.vscode/launch.json index 1c0dd85..370fd86 100644 --- a/Examples/.vscode/launch.json +++ b/Examples/.vscode/launch.json @@ -11,7 +11,7 @@ "preLaunchTask": "build", // If you have changed target frameworks, make sure to update the program path. "program": "${workspaceFolder}/bin/Debug/netcoreapp2.0/Examples.dll", - "args": ["FitToDistribution"], + "args": [], "cwd": "${workspaceFolder}", // For more information about the 'console' field, see https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md#console-terminal-window "console": "internalConsole", diff --git a/Examples/Data.cs b/Examples/Data.cs index 41b608c..91bfb7d 100644 --- a/Examples/Data.cs +++ b/Examples/Data.cs @@ -9,10 +9,79 @@ using Meta.Numerics.Statistics; using Meta.Numerics.Statistics.Distributions; +using Newtonsoft.Json; + namespace Examples { public static class Data { + public static void ConstructTestCsv () { + + using (TextWriter writer = new StreamWriter(File.OpenWrite("test.csv"))) { + writer.WriteLine("Id, Name, Sex, Birthdate, Height, Weight, Result"); + writer.WriteLine("1, John, M, 1970-01-02, 190.0, 75.0, True"); + writer.WriteLine("2, Mary, F, 1980-02-03, 155.0, 40.0, True"); + writer.WriteLine("3, Luke, M, 1990-03-04, 180.0, 60.0, False"); + } + + } + + [ExampleMethod] + public static void ImportingData () { + + FrameTable data; + using (TextReader reader = File.OpenText("test.csv")) { + data = FrameTable.FromCsv(reader); + } + + Console.WriteLine($"Imported CSV file with {data.Rows.Count} rows."); + Console.WriteLine("The names and types of the columns are:"); + foreach (FrameColumn column in data.Columns) { + Console.WriteLine($" {column.Name} of type {column.StorageType}"); + } + + FrameTable titanic; + Uri url = new Uri("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"); + WebRequest request = WebRequest.Create(url); + using (WebResponse response = request.GetResponse()) { + using (StreamReader reader = new StreamReader(response.GetResponseStream())) { + titanic = FrameTable.FromCsv(reader); + } + } + + Uri jsonUrl = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json"); + WebClient client = new WebClient(); + string input = client.DownloadString(jsonUrl); + List> output = JsonConvert.DeserializeObject>>(input); + FrameTable jsonExample = FrameTable.FromDictionaries(output); + + // Define the schema. + FrameTable table = new FrameTable(); + table.AddColumn("Id"); + table.AddColumn("Name"); + table.AddColumn("Sex"); + table.AddColumn("Birthdate"); + table.AddColumn("Height"); + table.AddColumn("Weight"); + table.AddColumn("Result"); + + // Add rows using as arrays of objects. + table.AddRow(1, "John", "M", DateTime.Parse("1970-01-02"), 190.0, 75.0, true); + table.AddRow(2, "Mary", "F", DateTime.Parse("1980-02-03"), 155.0, null, true); + + // Add a row using a dictionary. This is more verbose, but very clear. + table.AddRow(new Dictionary(){ + {"Id", 3}, + {"Name", null}, + {"Sex", "M"}, + {"Birthdate", DateTime.Parse("1990-03-04")}, + {"Height", 180.0}, + {"Weight", 60.0}, + {"Result", false} + }); + + } + [ExampleMethod] public static void ManipulatingData () { @@ -82,8 +151,18 @@ public static void AnalyzingData () { table = FrameTable.FromCsv(reader); } } + FrameView view = table.WhereNotNull(); + + // Get the column with (zero-based) index 4. + FrameColumn column4 = view.Columns[4]; + // Get the column named "Height". + FrameColumn heightsColumn = view.Columns["Height"]; + // Even easier way to get the column named "Height". + FrameColumn alsoHeightsColumn = view["Height"]; - SummaryStatistics summary = new SummaryStatistics(table["Height"].As()); + IReadOnlyList heights = view["Height"].As(); + + SummaryStatistics summary = new SummaryStatistics(view["Height"].As()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); @@ -92,74 +171,77 @@ public static void AnalyzingData () { Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList maleHeights = - table.Where("Sex", s => s == "M").Columns["Height"].As(); + view.Where("Sex", s => s == "M").Columns["Height"].As(); IReadOnlyList femaleHeights = - table.Where("Sex", s => s == "F").Columns["Height"].As(); + view.Where("Sex", s => s == "F").Columns["Height"].As(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); - Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}, P = {test.Probability}"); + Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); + Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); - TestResult totalHeightNormality = table["Height"].As().ShapiroFranciaTest(); + TestResult totalHeightNormality = view["Height"].As().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = - table["Weight"].As().LinearRegression(table["Height"].As()); + view["Weight"].As().LinearRegression(view["Height"].As()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable contingency = - Bivariate.Crosstabs(table["Sex"].As(), table["Result"].As()); + Bivariate.Crosstabs(view["Sex"].As(), view["Result"].As()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); - Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(false, "F")}"); + Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); - table.AddComputedColumn("Bmi", r => ((double) r["Weight"])/MoreMath.Sqr((double) r["Height"] / 100.0)); - table.AddComputedColumn("Age", r=> (DateTime.Now - (DateTime) r["Birthdate"]).TotalDays / 365.24); + view.AddComputedColumn("Bmi", r => ((double) r["Weight"])/MoreMath.Sqr((double) r["Height"] / 100.0)); + view.AddComputedColumn("Age", r=> (DateTime.Now - (DateTime) r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = - table["Result"].As().MultiLinearLogisticRegression( - table["Bmi"].As(), - table["Sex"].As(s => s == "M" ? 1.0 : 0.0) + view["Result"].As().MultiLinearLogisticRegression( + view["Bmi"].As(), + view["Sex"].As(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } - //TestResult ageResultPearson = Bivariate.PearsonRTest(table["Age"].As(), table["Result"].As()); - TestResult spearman = Bivariate.SpearmanRhoTest(table["Age"].As(), table["Result"].As()); + TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As(), view["Result"].As()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); } - public static void ConstructData () { + public static void ConstructExampleData () { FrameTable table = new FrameTable(); table.AddColumn("Id"); table.AddColumn("Name"); table.AddColumn("Sex"); table.AddColumn("Birthdate"); - table.AddColumns("Height", "Weight"); + table.AddColumn("Height"); + table.AddColumns("Weight"); table.AddColumn("Result"); - //Random rng = new Random(3); - //Random rng = new Random(314159); - // Random rng = new Random(271828); Random rng = new Random(1000001); - //string[] maleNames = new string[1024]; string[] maleNames = new string[] {"Alex", "Chris", "David", "Eric", "Frederic", "George", "Hans", "Igor", "John", "Kevin", "Luke", "Mark", "Oscar", "Peter", "Richard", "Stephan", "Thomas", "Vincent" }; AddRows(table, maleNames, "M", 175.0, 12.0, 24.0, 3.0, 1, rng); - //string[] femaleNames = new string[1024]; string[] femaleNames = new string[] {"Anne", "Belle", "Dorothy", "Elizabeth", "Fiona", "Helen", "Julia", "Kate", "Louise", "Mary", "Natalie", "Olivia", "Ruth", "Sarah", "Theresa", "Viola" }; AddRows(table, femaleNames, "F", 160.0, 10.0, 24.0, 3.0, 0, rng); - string path = @"C:\Users\dawright\Documents\example.csv"; + // add rows with nulls + table.AddRow(table.Rows.Count, null, "M", DateTime.Parse("1970-07-27"), 183.0, 74.0, false); + table.AddRow(table.Rows.Count, "Zoey", "F", DateTime.Parse("2007-09-17"), 138.0, null, false); + + string path = @"example.csv"; using (StreamWriter writer = new StreamWriter(File.OpenWrite(path))) { table.ToCsv(writer); } Console.WriteLine(File.Exists(path)); + string json = JsonConvert.SerializeObject(table.ToDictionaries(), Formatting.Indented); + File.WriteAllText("example.json", json); + } private static void AddRows(FrameTable table, IReadOnlyList names, string sex, double meanHeight, double stddevHeight, double meanBmi, double stddevBmi, int flag, Random rng) { diff --git a/Examples/Examples.csproj b/Examples/Examples.csproj index aa5f809..d2b53c2 100644 --- a/Examples/Examples.csproj +++ b/Examples/Examples.csproj @@ -5,5 +5,6 @@ + \ No newline at end of file diff --git a/Examples/Program.cs b/Examples/Program.cs index 0cb99e2..8fb5514 100644 --- a/Examples/Program.cs +++ b/Examples/Program.cs @@ -23,7 +23,6 @@ private static MethodInfo[] GetExampleMethods () { static void Main(string[] args) { - MethodInfo[] methods = GetExampleMethods(); Dictionary index = new Dictionary(); foreach (MethodInfo method in methods) { diff --git a/Numerics/Analysis/EvaluationSettings.cs b/Numerics/Analysis/EvaluationSettings.cs index 8b22305..57d88c0 100644 --- a/Numerics/Analysis/EvaluationSettings.cs +++ b/Numerics/Analysis/EvaluationSettings.cs @@ -5,12 +5,24 @@ namespace Meta.Numerics.Analysis { /// - /// Contains settings controling the evaluation of a function. + /// Contains settings governing the evaluation of a function. /// + /// + /// Negative values of , + /// , and + /// indicate that the analysis method should use its defaults for + /// that property. Override the default for a property by + /// setting it explicitly. If you set values for some properties + /// but not others, your setting will be applied to the property + /// you set and the others will use defaults. + /// When an analysis method returns an + /// object, its evaluation setting object will contain the specific + /// settings used, so you can see which default values were applied. + /// public class EvaluationSettings { /// - /// Initializes a new set of default evaulation settings. + /// Initializes a new set of default evaluation settings. /// public EvaluationSettings () { evaluationBudget = -1; diff --git a/Numerics/Data/FrameTable.cs b/Numerics/Data/FrameTable.cs index bab8460..13aea75 100644 --- a/Numerics/Data/FrameTable.cs +++ b/Numerics/Data/FrameTable.cs @@ -13,7 +13,7 @@ namespace Meta.Numerics.Data // Additional flags could include: iid, series, circular /// - /// A modify-able array of data. + /// Represents a modify-able array of data. /// /// /// This is the central class for storing data in our data frame system. @@ -21,7 +21,7 @@ namespace Meta.Numerics.Data /// file or the method to create a frame table from JSON or another /// collection-of-dictionaries representation. Or create one programmatically by using the /// constructor to instantiate an empty data frame and - /// and to add columns and rows. + /// and to add columns and rows. /// Using the methods inherited from the class, you can filter, re-order, /// manipulate, and analyze data without incurring the space or time costs of copying the stored data. /// @@ -165,7 +165,7 @@ public void AddColumn(string name, List storage) { } /// - /// Adds the new columns with the given names. + /// Adds new columns with the given names. /// /// The type of the columns. /// The names of the columns. diff --git a/Numerics/Data/FrameView.cs b/Numerics/Data/FrameView.cs index 7d49d07..3cc9d30 100644 --- a/Numerics/Data/FrameView.cs +++ b/Numerics/Data/FrameView.cs @@ -29,7 +29,7 @@ public enum SortOrder { /// - /// A read-only view of an array of data. + /// Represents a read-only view of an array of data. /// /// /// This is the central class for viewing data in our data frame system. @@ -50,10 +50,14 @@ public enum SortOrder { /// accessor to get a column, together with the /// caster to expose it as a collection of the required type. /// For example, to obtain a estimate of the mean of the population from the sample in the - /// column named "heights", write view["height"].As<double>().PopulationMean(). + /// column named "heights", write view["height"].As<double>().PopulationMean(). + /// Note that, for this to succeed, the underlying storage type of the heights column need not be double. As + /// long as the data are convertible to the target type, no problems will arise. For example, + /// the underlying storage type might be int, or double? as long as no null values are present in the view. /// To create the original array of data that will be manipulated, use the /// class. Note that, because the underlying data is not copied when a new view is generated, changes - /// to the original table may not be reflected in the views that have been generated from it. + /// to the original table may have unexpected consequences for the views linked to it. Best practice + /// is not to change the underlying data after generating views based on it. /// You can export a view to CSV or JSON formats using the /// and methods. /// diff --git a/Numerics/Properties/AssemblyInfo.cs b/Numerics/Properties/AssemblyInfo.cs index 2b7ec5a..51e14bd 100644 --- a/Numerics/Properties/AssemblyInfo.cs +++ b/Numerics/Properties/AssemblyInfo.cs @@ -46,6 +46,6 @@ // // You can specify all the values or you can default the Revision and Build Numbers // by using the '*' as shown below: -[assembly: AssemblyVersion("4.0.5.0")] -[assembly: AssemblyFileVersion("4.0.5.0")] +[assembly: AssemblyVersion("4.0.7.0")] +[assembly: AssemblyFileVersion("4.0.7.0")] diff --git a/Numerics/Statistics/Distributions/PearsonRDistribution.cs b/Numerics/Statistics/Distributions/PearsonRDistribution.cs index bbfa2c2..7c392bc 100644 --- a/Numerics/Statistics/Distributions/PearsonRDistribution.cs +++ b/Numerics/Statistics/Distributions/PearsonRDistribution.cs @@ -25,7 +25,7 @@ public override double ProbabilityDensity (double x) { if (Math.Abs(x) > 1.0) { return (0.0); } else { - return (Math.Pow(1.0 - x * x, (n - 4) / 2.0) / AdvancedMath.Beta(0.5, (n - 2) / 2.0)); + return (Math.Pow((1.0 - x) * (1.0 + x), (n - 4) / 2.0) / AdvancedMath.Beta(0.5, (n - 2) / 2.0)); } } @@ -89,7 +89,7 @@ public override double LeftProbability (double x) { if (x <= -1.0) { return (0.0); } else if (x < 0.0) { - return (AdvancedMath.Beta((n - 2) / 2.0, 0.5, 1.0 - x * x) / AdvancedMath.Beta((n-2) / 2.0, 0.5) / 2.0); + return (AdvancedMath.Beta((n - 2) / 2.0, 0.5, (1.0 - x) * (1.0 + x)) / AdvancedMath.Beta((n-2) / 2.0, 0.5) / 2.0); } else if (x < 1.0) { return ((1.0 + AdvancedMath.Beta(0.5, (n - 2) / 2.0, x * x) / AdvancedMath.Beta(0.5, (n-2) / 2.0)) / 2.0); } else { diff --git a/Test/BivariateSampleTest.cs b/Test/BivariateSampleTest.cs index f498869..ec9b3e9 100644 --- a/Test/BivariateSampleTest.cs +++ b/Test/BivariateSampleTest.cs @@ -371,7 +371,7 @@ public void BivariateLinearRegressionNullDistribution () { Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, - new EvaluationSettings() { RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16 } + new EvaluationSettings() { RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16 } )); } diff --git a/Test/TridiagonalMatrixTest.cs b/Test/TridiagonalMatrixTest.cs index 2e9df49..f8e7af7 100644 --- a/Test/TridiagonalMatrixTest.cs +++ b/Test/TridiagonalMatrixTest.cs @@ -1,55 +1,15 @@ using System; + using Microsoft.VisualStudio.TestTools.UnitTesting; +using Meta.Numerics.Functions; using Meta.Numerics.Matrices; namespace Test { - [TestClass()] + [TestClass] public class TridiagonalMatrixTest { - - private TestContext testContextInstance; - - public TestContext TestContext { - get { - return testContextInstance; - } - set { - testContextInstance = value; - } - } - - #region Additional test attributes - // - //You can use the following additional attributes as you write your tests: - // - //Use ClassInitialize to run code before running the first test in the class - //[ClassInitialize()] - //public static void MyClassInitialize(TestContext testContext) - //{ - //} - // - //Use ClassCleanup to run code after all tests in a class have run - //[ClassCleanup()] - //public static void MyClassCleanup() - //{ - //} - // - //Use TestInitialize to run code before running each test - //[TestInitialize()] - //public void MyTestInitialize() - //{ - //} - // - //Use TestCleanup to run code after each test has run - //[TestCleanup()] - //public void MyTestCleanup() - //{ - //} - // - #endregion - public TridiagonalMatrix CreateRandomTridiagonalMatrix (int n) { return (CreateRandomTridiagonalMatrix(n, new Random(1))); } @@ -93,11 +53,8 @@ public void TridiagonalMatrixArithmeticTest () { TridiagonalMatrix T = CreateRandomTridiagonalMatrix(4); - //SquareMatrixTest.PrintMatrix(T); TridiagonalMatrix TA = T + T; - //SquareMatrixTest.PrintMatrix(TA); TridiagonalMatrix T2 = 2.0 * T; - //SquareMatrixTest.PrintMatrix(T2); Assert.IsTrue(TA == T2); TridiagonalMatrix TM = T - T; @@ -132,12 +89,9 @@ public void TridiagonalMatrixLUDecompositionTest () { for (int d = 3; d < 100; d = 2 * d) { - Console.WriteLine("d={0}", d); - TridiagonalMatrix T = CreateRandomTridiagonalMatrix(d); Assert.IsTrue(T.Dimension == d); - TridiagonalLUDecomposition LU = T.LUDecomposition(); Assert.IsTrue(LU.Dimension == d); @@ -173,6 +127,32 @@ public void TridiagonalMatrixLUDecompositionTest () { } + [TestMethod] + public void TridiagonalMatrixFibinacciDeterminant () { + + // The n X n tri-diagonal matrix with 1s on the diagonal, + // 1s on the super-diagonal, and -1s on the sub-diagonal + // has determinant equal to the (n+1)th Fibonacci number. + + foreach (int n in TestUtilities.GenerateIntegerValues(2, 128, 4)) { + + TridiagonalMatrix T = new TridiagonalMatrix(n); + for (int i = 0; i < n; i++) { + T[i, i] = 1.0; + } + for (int i = 1; i < n; i++) { + T[i - 1, i] = 1.0; + T[i, i - 1] = -1.0; + } + + Assert.IsTrue(TestUtilities.IsNearlyEqual( + T.Determinant(), + AdvancedIntegerMath.FibonacciNumber(n + 1) + )); + } + + } + }