Skip to content

Commit

Permalink
Doc changes and tridiagonal determinant test.
Browse files Browse the repository at this point in the history
  • Loading branch information
dcwuser committed May 2, 2018
1 parent a109110 commit 29f1b87
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 87 deletions.
2 changes: 1 addition & 1 deletion Examples/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"preLaunchTask": "build",
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/bin/Debug/netcoreapp2.0/Examples.dll",
"args": ["FitToDistribution"],
"args": [],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md#console-terminal-window
"console": "internalConsole",
Expand Down
128 changes: 105 additions & 23 deletions Examples/Data.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,79 @@
using Meta.Numerics.Statistics;
using Meta.Numerics.Statistics.Distributions;

using Newtonsoft.Json;

namespace Examples {

public static class Data {

public static void ConstructTestCsv () {

using (TextWriter writer = new StreamWriter(File.OpenWrite("test.csv"))) {
writer.WriteLine("Id, Name, Sex, Birthdate, Height, Weight, Result");
writer.WriteLine("1, John, M, 1970-01-02, 190.0, 75.0, True");
writer.WriteLine("2, Mary, F, 1980-02-03, 155.0, 40.0, True");
writer.WriteLine("3, Luke, M, 1990-03-04, 180.0, 60.0, False");
}

}

[ExampleMethod]
public static void ImportingData () {

FrameTable data;
using (TextReader reader = File.OpenText("test.csv")) {
data = FrameTable.FromCsv(reader);
}

Console.WriteLine($"Imported CSV file with {data.Rows.Count} rows.");
Console.WriteLine("The names and types of the columns are:");
foreach (FrameColumn column in data.Columns) {
Console.WriteLine($" {column.Name} of type {column.StorageType}");
}

FrameTable titanic;
Uri url = new Uri("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv");
WebRequest request = WebRequest.Create(url);
using (WebResponse response = request.GetResponse()) {
using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
titanic = FrameTable.FromCsv(reader);
}
}

Uri jsonUrl = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json");
WebClient client = new WebClient();
string input = client.DownloadString(jsonUrl);
List<Dictionary<string,object>> output = JsonConvert.DeserializeObject<List<Dictionary<string,object>>>(input);
FrameTable jsonExample = FrameTable.FromDictionaries(output);

// Define the schema.
FrameTable table = new FrameTable();
table.AddColumn<int>("Id");
table.AddColumn<string>("Name");
table.AddColumn<string>("Sex");
table.AddColumn<DateTime>("Birthdate");
table.AddColumn<double>("Height");
table.AddColumn<double?>("Weight");
table.AddColumn<bool>("Result");

// Add rows using as arrays of objects.
table.AddRow(1, "John", "M", DateTime.Parse("1970-01-02"), 190.0, 75.0, true);
table.AddRow(2, "Mary", "F", DateTime.Parse("1980-02-03"), 155.0, null, true);

// Add a row using a dictionary. This is more verbose, but very clear.
table.AddRow(new Dictionary<string,object>(){
{"Id", 3},
{"Name", null},
{"Sex", "M"},
{"Birthdate", DateTime.Parse("1990-03-04")},
{"Height", 180.0},
{"Weight", 60.0},
{"Result", false}
});

}

[ExampleMethod]
public static void ManipulatingData () {

Expand Down Expand Up @@ -82,8 +151,18 @@ public static void AnalyzingData () {
table = FrameTable.FromCsv(reader);
}
}
FrameView view = table.WhereNotNull();

// Get the column with (zero-based) index 4.
FrameColumn column4 = view.Columns[4];
// Get the column named "Height".
FrameColumn heightsColumn = view.Columns["Height"];
// Even easier way to get the column named "Height".
FrameColumn alsoHeightsColumn = view["Height"];

SummaryStatistics summary = new SummaryStatistics(table["Height"].As<double>());
IReadOnlyList<double> heights = view["Height"].As<double>();

SummaryStatistics summary = new SummaryStatistics(view["Height"].As<double>());
Console.WriteLine($"Count = {summary.Count}");
Console.WriteLine($"Mean = {summary.Mean}");
Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
Expand All @@ -92,74 +171,77 @@ public static void AnalyzingData () {
Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

IReadOnlyList<double> maleHeights =
table.Where<string>("Sex", s => s == "M").Columns["Height"].As<double>();
view.Where<string>("Sex", s => s == "M").Columns["Height"].As<double>();
IReadOnlyList<double> femaleHeights =
table.Where<string>("Sex", s => s == "F").Columns["Height"].As<double>();
view.Where<string>("Sex", s => s == "F").Columns["Height"].As<double>();
TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);
Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}, P = {test.Probability}");
Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
Console.WriteLine($"P = {test.Probability}");

TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest();
TestResult totalHeightNormality = table["Height"].As<double>().ShapiroFranciaTest();
TestResult totalHeightNormality = view["Height"].As<double>().ShapiroFranciaTest();
TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

LinearRegressionResult fit =
table["Weight"].As<double>().LinearRegression(table["Height"].As<double>());
view["Weight"].As<double>().LinearRegression(view["Height"].As<double>());
Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

ContingencyTable<string, bool> contingency =
Bivariate.Crosstabs(table["Sex"].As<string>(), table["Result"].As<bool>());
Bivariate.Crosstabs(view["Sex"].As<string>(), view["Result"].As<bool>());
Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(false, "F")}");
Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

table.AddComputedColumn("Bmi", r => ((double) r["Weight"])/MoreMath.Sqr((double) r["Height"] / 100.0));
table.AddComputedColumn("Age", r=> (DateTime.Now - (DateTime) r["Birthdate"]).TotalDays / 365.24);
view.AddComputedColumn("Bmi", r => ((double) r["Weight"])/MoreMath.Sqr((double) r["Height"] / 100.0));
view.AddComputedColumn("Age", r=> (DateTime.Now - (DateTime) r["Birthdate"]).TotalDays / 365.24);

MultiLinearLogisticRegressionResult result =
table["Result"].As<bool>().MultiLinearLogisticRegression(
table["Bmi"].As<double>(),
table["Sex"].As<string, double>(s => s == "M" ? 1.0 : 0.0)
view["Result"].As<bool>().MultiLinearLogisticRegression(
view["Bmi"].As<double>(),
view["Sex"].As<string, double>(s => s == "M" ? 1.0 : 0.0)
);
foreach (Parameter parameter in result.Parameters) {
Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
}

//TestResult ageResultPearson = Bivariate.PearsonRTest(table["Age"].As<double>(), table["Result"].As<double>());
TestResult spearman = Bivariate.SpearmanRhoTest(table["Age"].As<double>(), table["Result"].As<double>());
TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As<double>(), view["Result"].As<double>());
Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");

}

public static void ConstructData () {
public static void ConstructExampleData () {

FrameTable table = new FrameTable();
table.AddColumn<int>("Id");
table.AddColumn<string>("Name");
table.AddColumn<string>("Sex");
table.AddColumn<DateTime>("Birthdate");
table.AddColumns<double>("Height", "Weight");
table.AddColumn<double>("Height");
table.AddColumns<double?>("Weight");
table.AddColumn<bool>("Result");

//Random rng = new Random(3);
//Random rng = new Random(314159);
// Random rng = new Random(271828);
Random rng = new Random(1000001);

//string[] maleNames = new string[1024];
string[] maleNames = new string[] {"Alex", "Chris", "David", "Eric", "Frederic", "George", "Hans", "Igor", "John", "Kevin", "Luke", "Mark", "Oscar", "Peter", "Richard", "Stephan", "Thomas", "Vincent" };
AddRows(table, maleNames, "M", 175.0, 12.0, 24.0, 3.0, 1, rng);

//string[] femaleNames = new string[1024];
string[] femaleNames = new string[] {"Anne", "Belle", "Dorothy", "Elizabeth", "Fiona", "Helen", "Julia", "Kate", "Louise", "Mary", "Natalie", "Olivia", "Ruth", "Sarah", "Theresa", "Viola" };
AddRows(table, femaleNames, "F", 160.0, 10.0, 24.0, 3.0, 0, rng);

string path = @"C:\Users\dawright\Documents\example.csv";
// add rows with nulls
table.AddRow(table.Rows.Count, null, "M", DateTime.Parse("1970-07-27"), 183.0, 74.0, false);
table.AddRow(table.Rows.Count, "Zoey", "F", DateTime.Parse("2007-09-17"), 138.0, null, false);

string path = @"example.csv";
using (StreamWriter writer = new StreamWriter(File.OpenWrite(path))) {
table.ToCsv(writer);
}
Console.WriteLine(File.Exists(path));

string json = JsonConvert.SerializeObject(table.ToDictionaries(), Formatting.Indented);
File.WriteAllText("example.json", json);

}

private static void AddRows(FrameTable table, IReadOnlyList<string> names, string sex, double meanHeight, double stddevHeight, double meanBmi, double stddevBmi, int flag, Random rng) {
Expand Down
1 change: 1 addition & 0 deletions Examples/Examples.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Meta.Numerics" Version="4.0.5-alpha"/>
<PackageReference Include="Newtonsoft.Json" Version="11.0.2"/>
</ItemGroup>
</Project>
1 change: 0 additions & 1 deletion Examples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ private static MethodInfo[] GetExampleMethods () {

static void Main(string[] args)
{

MethodInfo[] methods = GetExampleMethods();
Dictionary<string, MethodInfo> index = new Dictionary<string, MethodInfo>();
foreach (MethodInfo method in methods) {
Expand Down
16 changes: 14 additions & 2 deletions Numerics/Analysis/EvaluationSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,24 @@
namespace Meta.Numerics.Analysis {

/// <summary>
/// Contains settings controling the evaluation of a function.
/// Contains settings governing the evaluation of a function.
/// </summary>
/// <remarks>
/// <para>Negative values of <see cref="EvaluationBudget"/>,
/// <see cref="RelativePrecision"/>, and <see cref="AbsolutePrecision"/>
/// indicate that the analysis method should use its defaults for
/// that property. Override the default for a property by
/// setting it explicitly. If you set values for some properties
/// but not others, your setting will be applied to the property
/// you set and the others will use defaults.</para>
/// <para>When an analysis method returns an <see cref="EvaluationResult"/>
/// object, its evaluation setting object will contain the specific
/// settings used, so you can see which default values were applied.</para>
/// </remarks>
public class EvaluationSettings {

/// <summary>
/// Initializes a new set of default evaulation settings.
/// Initializes a new set of default evaluation settings.
/// </summary>
public EvaluationSettings () {
evaluationBudget = -1;
Expand Down
6 changes: 3 additions & 3 deletions Numerics/Data/FrameTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ namespace Meta.Numerics.Data
// Additional flags could include: iid, series, circular

/// <summary>
/// A modify-able array of data.
/// Represents a modify-able array of data.
/// </summary>
/// <remarks>
/// <para>This is the central class for storing data in our data frame system.</para>
/// <para>Use the <see cref="FromCsv"/> method to create a frame table from a comma-separated values
/// file or the <see cref="FromDictionaries"/> method to create a frame table from JSON or another
/// collection-of-dictionaries representation. Or create one programmatically by using the
/// <see cref="FrameTable()"/> constructor to instantiate an empty data frame and
/// <see cref="AddColumn{T}(string)"/> and <see cref="AddRow(object[])"/> to add columns and rows.</para>
/// <see cref="AddColumn{T}(string)" autoUpgrade="true"/> and <see cref="AddRow(object[])" autoUpgrade="true"/> to add columns and rows.</para>
/// <para>Using the methods inherited from the <see cref="FrameView"/> class, you can filter, re-order,
/// manipulate, and analyze data without incurring the space or time costs of copying the stored data.</para>
/// </remarks>
Expand Down Expand Up @@ -165,7 +165,7 @@ public void AddColumn<T>(string name, List<T> storage) {
}

/// <summary>
/// Adds the new columns with the given names.
/// Adds new columns with the given names.
/// </summary>
/// <typeparam name="T">The type of the columns.</typeparam>
/// <param name="names">The names of the columns.</param>
Expand Down
10 changes: 7 additions & 3 deletions Numerics/Data/FrameView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public enum SortOrder {


/// <summary>
/// A read-only view of an array of data.
/// Represents a read-only view of an array of data.
/// </summary>
/// <remarks>
/// <para>This is the central class for viewing data in our data frame system.</para>
Expand All @@ -50,10 +50,14 @@ public enum SortOrder {
/// <see cref="this[string]"/> accessor to get a column, together with the <see cref="FrameColumn.As{T}"/>
/// caster to expose it as a collection of the required type.
/// For example, to obtain a estimate of the mean of the population from the sample in the
/// column named "heights", write <tt>view["height"].As&lt;double&gt;().PopulationMean()</tt>.</para>
/// column named "heights", write <tt>view["height"].As&lt;double&gt;().PopulationMean()</tt>.
/// Note that, for this to succeed, the underlying storage type of the heights column need not be double. As
/// long as the data are convertible to the target type, no problems will arise. For example,
/// the underlying storage type might be int, or double? as long as no null values are present in the view.</para>
/// <para>To create the original array of data that will be manipulated, use the <see cref="FrameTable"/>
/// class. Note that, because the underlying data is not copied when a new view is generated, changes
/// to the original table may not be reflected in the views that have been generated from it.</para>
/// to the original table may have unexpected consequences for the views linked to it. Best practice
/// is not to change the underlying data after generating views based on it.</para>
/// <para>You can export a view to CSV or JSON formats using the <see cref="ToCsv(TextWriter)"/>
/// and <see cref="ToDictionaries"/> methods.</para>
/// </remarks>
Expand Down
4 changes: 2 additions & 2 deletions Numerics/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@
//
// You can specify all the values or you can default the Revision and Build Numbers
// by using the '*' as shown below:
[assembly: AssemblyVersion("4.0.5.0")]
[assembly: AssemblyFileVersion("4.0.5.0")]
[assembly: AssemblyVersion("4.0.7.0")]
[assembly: AssemblyFileVersion("4.0.7.0")]

4 changes: 2 additions & 2 deletions Numerics/Statistics/Distributions/PearsonRDistribution.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public override double ProbabilityDensity (double x) {
if (Math.Abs(x) > 1.0) {
return (0.0);
} else {
return (Math.Pow(1.0 - x * x, (n - 4) / 2.0) / AdvancedMath.Beta(0.5, (n - 2) / 2.0));
return (Math.Pow((1.0 - x) * (1.0 + x), (n - 4) / 2.0) / AdvancedMath.Beta(0.5, (n - 2) / 2.0));
}
}

Expand Down Expand Up @@ -89,7 +89,7 @@ public override double LeftProbability (double x) {
if (x <= -1.0) {
return (0.0);
} else if (x < 0.0) {
return (AdvancedMath.Beta((n - 2) / 2.0, 0.5, 1.0 - x * x) / AdvancedMath.Beta((n-2) / 2.0, 0.5) / 2.0);
return (AdvancedMath.Beta((n - 2) / 2.0, 0.5, (1.0 - x) * (1.0 + x)) / AdvancedMath.Beta((n-2) / 2.0, 0.5) / 2.0);
} else if (x < 1.0) {
return ((1.0 + AdvancedMath.Beta(0.5, (n - 2) / 2.0, x * x) / AdvancedMath.Beta(0.5, (n-2) / 2.0)) / 2.0);
} else {
Expand Down
2 changes: 1 addition & 1 deletion Test/BivariateSampleTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ public void BivariateLinearRegressionNullDistribution () {

Assert.IsTrue(TestUtilities.IsNearlyEqual(
result.R.Probability, result.F.Probability,
new EvaluationSettings() { RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16 }
new EvaluationSettings() { RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16 }
));

}
Expand Down
Loading

0 comments on commit 29f1b87

Please sign in to comment.