Skip to content

Commit

Permalink
Merge pull request #39 from dcwuser/dev/examples
Browse files Browse the repository at this point in the history
Dev/examples
  • Loading branch information
dcwuser authored May 2, 2018
2 parents ccf034f + 29f1b87 commit b24446d
Show file tree
Hide file tree
Showing 13 changed files with 297 additions and 87 deletions.
2 changes: 1 addition & 1 deletion Examples/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"preLaunchTask": "build",
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/bin/Debug/netcoreapp2.0/Examples.dll",
"args": ["EigenvaluesAndEigenvectors"],
"args": [],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md#console-terminal-window
"console": "internalConsole",
Expand Down
128 changes: 105 additions & 23 deletions Examples/Data.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,79 @@
using Meta.Numerics.Statistics;
using Meta.Numerics.Statistics.Distributions;

using Newtonsoft.Json;

namespace Examples {

public static class Data {

public static void ConstructTestCsv () {

using (TextWriter writer = new StreamWriter(File.OpenWrite("test.csv"))) {
writer.WriteLine("Id, Name, Sex, Birthdate, Height, Weight, Result");
writer.WriteLine("1, John, M, 1970-01-02, 190.0, 75.0, True");
writer.WriteLine("2, Mary, F, 1980-02-03, 155.0, 40.0, True");
writer.WriteLine("3, Luke, M, 1990-03-04, 180.0, 60.0, False");
}

}

[ExampleMethod]
public static void ImportingData () {

FrameTable data;
using (TextReader reader = File.OpenText("test.csv")) {
data = FrameTable.FromCsv(reader);
}

Console.WriteLine($"Imported CSV file with {data.Rows.Count} rows.");
Console.WriteLine("The names and types of the columns are:");
foreach (FrameColumn column in data.Columns) {
Console.WriteLine($" {column.Name} of type {column.StorageType}");
}

FrameTable titanic;
Uri url = new Uri("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv");
WebRequest request = WebRequest.Create(url);
using (WebResponse response = request.GetResponse()) {
using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
titanic = FrameTable.FromCsv(reader);
}
}

Uri jsonUrl = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json");
WebClient client = new WebClient();
string input = client.DownloadString(jsonUrl);
List<Dictionary<string,object>> output = JsonConvert.DeserializeObject<List<Dictionary<string,object>>>(input);
FrameTable jsonExample = FrameTable.FromDictionaries(output);

// Define the schema.
FrameTable table = new FrameTable();
table.AddColumn<int>("Id");
table.AddColumn<string>("Name");
table.AddColumn<string>("Sex");
table.AddColumn<DateTime>("Birthdate");
table.AddColumn<double>("Height");
table.AddColumn<double?>("Weight");
table.AddColumn<bool>("Result");

// Add rows using as arrays of objects.
table.AddRow(1, "John", "M", DateTime.Parse("1970-01-02"), 190.0, 75.0, true);
table.AddRow(2, "Mary", "F", DateTime.Parse("1980-02-03"), 155.0, null, true);

// Add a row using a dictionary. This is more verbose, but very clear.
table.AddRow(new Dictionary<string,object>(){
{"Id", 3},
{"Name", null},
{"Sex", "M"},
{"Birthdate", DateTime.Parse("1990-03-04")},
{"Height", 180.0},
{"Weight", 60.0},
{"Result", false}
});

}

[ExampleMethod]
public static void ManipulatingData () {

Expand Down Expand Up @@ -82,8 +151,18 @@ public static void AnalyzingData () {
table = FrameTable.FromCsv(reader);
}
}
FrameView view = table.WhereNotNull();

// Get the column with (zero-based) index 4.
FrameColumn column4 = view.Columns[4];
// Get the column named "Height".
FrameColumn heightsColumn = view.Columns["Height"];
// Even easier way to get the column named "Height".
FrameColumn alsoHeightsColumn = view["Height"];

SummaryStatistics summary = new SummaryStatistics(table["Height"].As<double>());
IReadOnlyList<double> heights = view["Height"].As<double>();

SummaryStatistics summary = new SummaryStatistics(view["Height"].As<double>());
Console.WriteLine($"Count = {summary.Count}");
Console.WriteLine($"Mean = {summary.Mean}");
Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
Expand All @@ -92,74 +171,77 @@ public static void AnalyzingData () {
Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

IReadOnlyList<double> maleHeights =
table.Where<string>("Sex", s => s == "M").Columns["Height"].As<double>();
view.Where<string>("Sex", s => s == "M").Columns["Height"].As<double>();
IReadOnlyList<double> femaleHeights =
table.Where<string>("Sex", s => s == "F").Columns["Height"].As<double>();
view.Where<string>("Sex", s => s == "F").Columns["Height"].As<double>();
TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);
Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}, P = {test.Probability}");
Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
Console.WriteLine($"P = {test.Probability}");

TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest();
TestResult totalHeightNormality = table["Height"].As<double>().ShapiroFranciaTest();
TestResult totalHeightNormality = view["Height"].As<double>().ShapiroFranciaTest();
TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

LinearRegressionResult fit =
table["Weight"].As<double>().LinearRegression(table["Height"].As<double>());
view["Weight"].As<double>().LinearRegression(view["Height"].As<double>());
Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

ContingencyTable<string, bool> contingency =
Bivariate.Crosstabs(table["Sex"].As<string>(), table["Result"].As<bool>());
Bivariate.Crosstabs(view["Sex"].As<string>(), view["Result"].As<bool>());
Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(false, "F")}");
Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

table.AddComputedColumn("Bmi", r => ((double) r["Weight"])/MoreMath.Sqr((double) r["Height"] / 100.0));
table.AddComputedColumn("Age", r=> (DateTime.Now - (DateTime) r["Birthdate"]).TotalDays / 365.24);
view.AddComputedColumn("Bmi", r => ((double) r["Weight"])/MoreMath.Sqr((double) r["Height"] / 100.0));
view.AddComputedColumn("Age", r=> (DateTime.Now - (DateTime) r["Birthdate"]).TotalDays / 365.24);

MultiLinearLogisticRegressionResult result =
table["Result"].As<bool>().MultiLinearLogisticRegression(
table["Bmi"].As<double>(),
table["Sex"].As<string, double>(s => s == "M" ? 1.0 : 0.0)
view["Result"].As<bool>().MultiLinearLogisticRegression(
view["Bmi"].As<double>(),
view["Sex"].As<string, double>(s => s == "M" ? 1.0 : 0.0)
);
foreach (Parameter parameter in result.Parameters) {
Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
}

//TestResult ageResultPearson = Bivariate.PearsonRTest(table["Age"].As<double>(), table["Result"].As<double>());
TestResult spearman = Bivariate.SpearmanRhoTest(table["Age"].As<double>(), table["Result"].As<double>());
TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As<double>(), view["Result"].As<double>());
Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");

}

public static void ConstructData () {
public static void ConstructExampleData () {

FrameTable table = new FrameTable();
table.AddColumn<int>("Id");
table.AddColumn<string>("Name");
table.AddColumn<string>("Sex");
table.AddColumn<DateTime>("Birthdate");
table.AddColumns<double>("Height", "Weight");
table.AddColumn<double>("Height");
table.AddColumns<double?>("Weight");
table.AddColumn<bool>("Result");

//Random rng = new Random(3);
//Random rng = new Random(314159);
// Random rng = new Random(271828);
Random rng = new Random(1000001);

//string[] maleNames = new string[1024];
string[] maleNames = new string[] {"Alex", "Chris", "David", "Eric", "Frederic", "George", "Hans", "Igor", "John", "Kevin", "Luke", "Mark", "Oscar", "Peter", "Richard", "Stephan", "Thomas", "Vincent" };
AddRows(table, maleNames, "M", 175.0, 12.0, 24.0, 3.0, 1, rng);

//string[] femaleNames = new string[1024];
string[] femaleNames = new string[] {"Anne", "Belle", "Dorothy", "Elizabeth", "Fiona", "Helen", "Julia", "Kate", "Louise", "Mary", "Natalie", "Olivia", "Ruth", "Sarah", "Theresa", "Viola" };
AddRows(table, femaleNames, "F", 160.0, 10.0, 24.0, 3.0, 0, rng);

string path = @"C:\Users\dawright\Documents\example.csv";
// add rows with nulls
table.AddRow(table.Rows.Count, null, "M", DateTime.Parse("1970-07-27"), 183.0, 74.0, false);
table.AddRow(table.Rows.Count, "Zoey", "F", DateTime.Parse("2007-09-17"), 138.0, null, false);

string path = @"example.csv";
using (StreamWriter writer = new StreamWriter(File.OpenWrite(path))) {
table.ToCsv(writer);
}
Console.WriteLine(File.Exists(path));

string json = JsonConvert.SerializeObject(table.ToDictionaries(), Formatting.Indented);
File.WriteAllText("example.json", json);

}

private static void AddRows(FrameTable table, IReadOnlyList<string> names, string sex, double meanHeight, double stddevHeight, double meanBmi, double stddevBmi, int flag, Random rng) {
Expand Down
102 changes: 102 additions & 0 deletions Examples/Distributions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
using System;

using Meta.Numerics;
using Meta.Numerics.Analysis;
using Meta.Numerics.Functions;
using Meta.Numerics.Statistics.Distributions;

namespace Examples {

public static class Distributions {

[ExampleMethod]
public static void DistributionFunctions () {

ContinuousDistribution gumbel = new GumbelDistribution();

// Use PDF to compute absolute deviation
IntegrationResult r = FunctionMath.Integrate(
z => gumbel.ProbabilityDensity(z) * Math.Abs(z - gumbel.Mean),
gumbel.Support
);
Console.WriteLine($"mean absolute deviation = {r.Value}");

// Shorter form
double gumbelMad = gumbel.ExpectationValue(z => Math.Abs(z - gumbel.Mean));
Console.WriteLine($"mean absolute deviation = {gumbelMad}");

double x = 1.5;

// PDF
Console.WriteLine($"p({x}) = {gumbel.ProbabilityDensity(x)}");

// CDF, aka percentile
double P = gumbel.LeftProbability(x);
Console.WriteLine($"P({x}) = {P}");

// Right CDF
double Q = gumbel.RightProbability(x);
Console.WriteLine($"Q({x}) = {Q}");

Console.WriteLine($"P + Q = {P + Q}");

// Far tail
double xt = 100.0;
double qt = gumbel.RightProbability(xt);
Console.WriteLine($"Q({xt}) = {qt}");

// Inverse CDF, aka quantile
Console.WriteLine($"PI({P}) = {gumbel.InverseLeftProbability(P)}");
Console.WriteLine($"QI({qt} = {gumbel.InverseRightProbability(qt)}");


DiscreteDistribution binomial = new BinomialDistribution(0.4, 8);

Console.WriteLine($"support {binomial.Support}");

int k = 4;
Console.WriteLine($"P({k}) = {binomial.ProbabilityMass(k)}");

double binomialMad = binomial.ExpectationValue(i => Math.Abs(i - binomial.Mean));
Console.WriteLine($"mean absolute deviation = {binomialMad}");

Console.WriteLine($"P(k < {k}) = {binomial.LeftExclusiveProbability(k)}");
Console.WriteLine($"P(k <= {k}) = {binomial.LeftInclusiveProbability(k)}");
Console.WriteLine($"P(k > {k}) = {binomial.RightExclusiveProbability(k)}");

int k0 = binomial.InverseLeftProbability(0.5);
Console.WriteLine($"min k0 to achieve P(k <= k0) > 0.5: {k0}");
Console.WriteLine($"P(k < {k0}) = {binomial.LeftExclusiveProbability(k0)}");
Console.WriteLine($"P(k <= {k0}) = {binomial.LeftInclusiveProbability(k0)}");

}


[ExampleMethod]
public static void DistributionMoments () {

//ContinuousDistribution d = new GumbelDistribution();
DiscreteDistribution d = new PoissonDistribution(5);
Console.WriteLine($"support = {d.Support}");

Console.WriteLine($"mean = {d.Mean}");
Console.WriteLine($"mean as expectation = {d.ExpectationValue(x => x)}");

Console.WriteLine($"variance = {d.Variance}");
Console.WriteLine($"variance as expectation = {d.ExpectationValue(x => MoreMath.Sqr(x - d.Mean))}");

Console.WriteLine($"standard deviation = {d.StandardDeviation}");
Console.WriteLine($"skewness = {d.Skewness}");
Console.WriteLine($"excess kuritosis = {d.ExcessKurtosis}");

for (int r = 0; r <= 4; r++) {
Console.WriteLine($"M_{r} = {d.RawMoment(r)}");
Console.WriteLine($"C_{r} = {d.CentralMoment(r)}");
Console.WriteLine($"K_{r} = {d.Cumulant(r)}");
}

}

}

}
1 change: 1 addition & 0 deletions Examples/Examples.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Meta.Numerics" Version="4.0.5-alpha"/>
<PackageReference Include="Newtonsoft.Json" Version="11.0.2"/>
</ItemGroup>
</Project>
1 change: 0 additions & 1 deletion Examples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ private static MethodInfo[] GetExampleMethods () {

static void Main(string[] args)
{

MethodInfo[] methods = GetExampleMethods();
Dictionary<string, MethodInfo> index = new Dictionary<string, MethodInfo>();
foreach (MethodInfo method in methods) {
Expand Down
30 changes: 30 additions & 0 deletions Examples/Statistics.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
using System;
using System.Linq;
using System.Collections.Generic;

using Meta.Numerics;
using Meta.Numerics.Matrices;
using Meta.Numerics.Statistics;
using Meta.Numerics.Statistics.Distributions;

namespace Examples {

Expand Down Expand Up @@ -133,6 +135,34 @@ public static void ContingencyTable () {

}

[ExampleMethod]
public static void FitToDistribution () {

Random rng = new Random(7);
WeibullDistribution distribution = new WeibullDistribution(3.0, 1.5);
List<double> sample = distribution.GetRandomValues(rng, 500).ToList();

WeibullFitResult weibull = sample.FitToWeibull();
Console.WriteLine($"Best fit scale: {weibull.Scale}");
Console.WriteLine($"Best fit shape: {weibull.Shape}");
Console.WriteLine($"Probability of fit: {weibull.GoodnessOfFit.Probability}");

LognormalFitResult lognormal = sample.FitToLognormal();
Console.WriteLine($"Best fit mu: {lognormal.Mu}");
Console.WriteLine($"Best fit sigma: {lognormal.Sigma}");
Console.WriteLine($"Probability of fit: {lognormal.GoodnessOfFit.Probability}");

var result = sample.MaximumLikelihoodFit(parameters => {
return (new WeibullDistribution(parameters["Scale"], parameters["Shape"]));
},
new Dictionary<string, double>() { {"Scale", 1.0}, {"Shape", 1.0}}
);
foreach(Parameter parameter in result.Parameters) {
Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
}

}

}

}
Loading

0 comments on commit b24446d

Please sign in to comment.