diff --git a/Sources/Accord.Statistics/Analysis/LogisticRegressionAnalysis.cs b/Sources/Accord.Statistics/Analysis/LogisticRegressionAnalysis.cs
index b5a52c181..9693616c0 100644
--- a/Sources/Accord.Statistics/Analysis/LogisticRegressionAnalysis.cs
+++ b/Sources/Accord.Statistics/Analysis/LogisticRegressionAnalysis.cs
@@ -153,7 +153,9 @@ public class LogisticRegressionAnalysis : IRegressionAnalysis
private string outputName;
private double[,] sourceMatrix;
- private double[] result;
+ private double[] result;
+
+ double regularization = 1e-10;
private LogisticCoefficientCollection coefficientCollection;
@@ -176,6 +178,7 @@ public LogisticRegressionAnalysis(double[][] inputs, double[] outputs)
// Initial argument checking
if (inputs == null)
throw new ArgumentNullException("inputs");
+
if (outputs == null)
throw new ArgumentNullException("outputs");
@@ -264,6 +267,18 @@ public double Tolerance
{
get { return tolerance; }
set { tolerance = value; }
+ }
+
+ ///
+ /// Gets or sets the regularization value to be
+ /// added in the objective function. Default is
+ /// 1e-10.
+ ///
+ ///
+ public double Regularization
+ {
+ get { return regularization; }
+ set { regularization = value; }
}
///
@@ -537,7 +552,9 @@ private bool compute()
double delta;
int iteration = 0;
- var learning = new IterativeReweightedLeastSquares(regression);
+ var learning = new IterativeReweightedLeastSquares(regression);
+
+ learning.Regularization = regularization;
do // learning iterations until convergence
{
@@ -564,7 +581,9 @@ private void computeInner()
// Perform likelihood-ratio tests against diminished nested models
var innerModel = new LogisticRegression(inputCount - 1);
- var learning = new IterativeReweightedLeastSquares(innerModel);
+ var learning = new IterativeReweightedLeastSquares(innerModel);
+
+ learning.Regularization = regularization;
for (int i = 0; i < inputCount; i++)
{
diff --git a/Sources/Accord.Statistics/Models/Regression/Nonlinear/Fitting/IterativeReweightedLeastSquares.cs b/Sources/Accord.Statistics/Models/Regression/Nonlinear/Fitting/IterativeReweightedLeastSquares.cs
index 7d754df59..944d71f9e 100644
--- a/Sources/Accord.Statistics/Models/Regression/Nonlinear/Fitting/IterativeReweightedLeastSquares.cs
+++ b/Sources/Accord.Statistics/Models/Regression/Nonlinear/Fitting/IterativeReweightedLeastSquares.cs
@@ -1,483 +1,472 @@
-// Accord Statistics Library
-// The Accord.NET Framework
-// http://accord-framework.net
-//
-// Copyright © César Souza, 2009-2015
-// cesarsouza at gmail.com
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-//
-
-namespace Accord.Statistics.Models.Regression.Fitting
-{
- using System;
- using Accord.Math;
- using Accord.Math.Decompositions;
- using Accord.Statistics.Links;
-
- ///
- /// Iterative Reweighted Least Squares for Logistic Regression fitting.
- ///
- ///
- ///
- ///
- /// The Iterative Reweighted Least Squares is an iterative technique based
- /// on the Newton-Raphson iterative optimization scheme. The IRLS method uses
- /// a local quadratic approximation to the log-likelihood function.
- ///
- /// By applying the Newton-Raphson optimization scheme to the cross-entropy
- /// error function (defined as the negative logarithm of the likelihood), one
- /// arises at a weighted formulation for the Hessian matrix.
- ///
- ///
- /// The Iterative Reweighted Least Squares algorithm can also be used to learn
- /// arbitrary generalized linear models. However, the use of this class to learn
- /// such models is currently experimental.
- ///
- ///
- ///
- /// References:
- ///
- /// -
- /// Bishop, Christopher M.; Pattern Recognition and Machine Learning.
- /// Springer; 1st ed. 2006.
- /// -
- /// Amos Storkey. (2005). Learning from Data: Learning Logistic Regressors. School of Informatics.
- /// Available on: http://www.inf.ed.ac.uk/teaching/courses/lfd/lectures/logisticlearn-print.pdf
- /// -
- /// Cosma Shalizi. (2009). Logistic Regression and Newton's Method. Available on:
- /// http://www.stat.cmu.edu/~cshalizi/350/lectures/26/lecture-26.pdf
- /// -
- /// Edward F. Conor. Logistic Regression. Website. Available on:
- /// http://userwww.sfsu.edu/~efc/classes/biol710/logistic/logisticreg.htm
- ///
- ///
- ///
- ///
- ///
- /// // Suppose we have the following data about some patients.
- /// // The first variable is continuous and represent patient
- /// // age. The second variable is dichotomic and give whether
- /// // they smoke or not (This is completely fictional data).
- /// double[][] input =
- /// {
- /// new double[] { 55, 0 }, // 0 - no cancer
- /// new double[] { 28, 0 }, // 0
- /// new double[] { 65, 1 }, // 0
- /// new double[] { 46, 0 }, // 1 - have cancer
- /// new double[] { 86, 1 }, // 1
- /// new double[] { 56, 1 }, // 1
- /// new double[] { 85, 0 }, // 0
- /// new double[] { 33, 0 }, // 0
- /// new double[] { 21, 1 }, // 0
- /// new double[] { 42, 1 }, // 1
- /// };
- ///
- /// // We also know if they have had lung cancer or not, and
- /// // we would like to know whether smoking has any connection
- /// // with lung cancer (This is completely fictional data).
- /// double[] output =
- /// {
- /// 0, 0, 0, 1, 1, 1, 0, 0, 0, 1
- /// };
- ///
- ///
- /// // To verify this hypothesis, we are going to create a logistic
- /// // regression model for those two inputs (age and smoking).
- /// LogisticRegression regression = new LogisticRegression(inputs: 2);
- ///
- /// // Next, we are going to estimate this model. For this, we
- /// // will use the Iteratively Reweighted Least Squares method.
- /// var teacher = new IterativeReweightedLeastSquares(regression);
- ///
- /// // Now, we will iteratively estimate our model. The Run method returns
- /// // the maximum relative change in the model parameters and we will use
- /// // it as the convergence criteria.
- ///
- /// double delta = 0;
- /// do
- /// {
- /// // Perform an iteration
- /// delta = teacher.Run(input, output);
- ///
- /// } while (delta > 0.001);
- ///
- /// // At this point, we can compute the odds ratio of our variables.
- /// // In the model, the variable at 0 is always the intercept term,
- /// // with the other following in the sequence. Index 1 is the age
- /// // and index 2 is whether the patient smokes or not.
- ///
- /// // For the age variable, we have that individuals with
- /// // higher age have 1.021 greater odds of getting lung
- /// // cancer controlling for cigarette smoking.
- /// double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701
- ///
- /// // For the smoking/non smoking category variable, however, we
- /// // have that individuals who smoke have 5.858 greater odds
- /// // of developing lung cancer compared to those who do not
- /// // smoke, controlling for age (remember, this is completely
- /// // fictional and for demonstration purposes only).
- /// double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331
- ///
- ///
- ///
- public class IterativeReweightedLeastSquares : IRegressionFitting
- {
-
- private GeneralizedLinearRegression regression;
-
- private int parameterCount;
-
- private double[,] hessian;
- private double[] gradient;
- private double[] previous;
-
- private double lambda = 1e-10;
-
-
- private bool computeStandardErrors = true;
- private ISolverMatrixDecomposition decomposition;
-
-
- ///
- /// Gets the previous values for the coefficients which were
- /// in place before the last learning iteration was performed.
- ///
- ///
- public double[] Previous { get { return previous; } }
-
- ///
- /// Gets the current values for the coefficients.
- ///
- ///
- public double[] Solution { get { return regression.Coefficients; } }
-
- ///
- /// Gets the Hessian matrix computed in
- /// the last Newton-Raphson iteration.
- ///
- ///
- public double[,] Hessian { get { return hessian; } }
-
- ///
- /// Gets the Gradient vector computed in
- /// the last Newton-Raphson iteration.
- ///
- ///
- public double[] Gradient { get { return gradient; } }
-
- ///
- /// Gets the total number of parameters in the model.
- ///
- ///
- public int Parameters { get { return parameterCount; } }
-
-
- ///
- /// Gets or sets a value indicating whether standard
- /// errors should be computed in the next iteration.
- ///
- ///
- /// true to compute standard errors; otherwise, false.
- ///
- ///
- public bool ComputeStandardErrors
- {
- get { return computeStandardErrors; }
- set { computeStandardErrors = value; }
- }
-
- ///
- /// Gets or sets the regularization value to be
- /// added in the objective function. Default is
- /// 1e-10.
- ///
- ///
- public double Regularization
- {
- get { return lambda; }
- set { lambda = value; }
- }
-
- ///
- /// Constructs a new Iterative Reweighted Least Squares.
- ///
- ///
- /// The regression to estimate.
- ///
- public IterativeReweightedLeastSquares(LogisticRegression regression)
- {
- constructor(GeneralizedLinearRegression.FromLogisticRegression(regression, makeCopy: false));
- }
-
- ///
- /// Constructs a new Iterative Reweighted Least Squares.
- ///
- ///
- /// The regression to estimate.
- ///
- public IterativeReweightedLeastSquares(GeneralizedLinearRegression regression)
- {
- constructor(regression);
- }
-
- private void constructor(GeneralizedLinearRegression regression)
- {
- if (regression == null)
- throw new ArgumentNullException("regression");
-
- this.regression = regression;
- this.parameterCount = regression.Coefficients.Length;
- this.hessian = new double[parameterCount, parameterCount];
- this.gradient = new double[parameterCount];
- }
-
- ///
- /// Runs one iteration of the Reweighted Least Squares algorithm.
- ///
- /// The input data.
- /// The outputs associated with each input vector.
- /// The maximum relative change in the parameters after the iteration.
- ///
- public double Run(double[][] inputs, int[] outputs)
- {
- return Run(inputs, outputs.Apply(x => x > 0 ? 1.0 : 0.0));
- }
-
- ///
- /// Runs one iteration of the Reweighted Least Squares algorithm.
- ///
- ///
- /// The input data.
- /// The outputs associated with each input vector.
- ///
- /// The maximum relative change in the parameters after the iteration.
- ///
- public double Run(double[][] inputs, int[][] outputs)
- {
- if (outputs[0].Length != 1)
- throw new ArgumentException("Function must have a single output.", "outputs");
- double[] output = new double[outputs.Length];
- for (int i = 0; i < outputs.Length; i++)
- output[i] = outputs[i][0] > 0 ? 1.0 : 0.0;
- return Run(inputs, output);
- }
-
- ///
- /// Runs one iteration of the Reweighted Least Squares algorithm.
- ///
- ///
- /// The input data.
- /// The outputs associated with each input vector.
- ///
- /// The maximum relative change in the parameters after the iteration.
- ///
- public double Run(double[][] inputs, double[][] outputs)
- {
- if (outputs[0].Length != 1)
- throw new ArgumentException("Function must have a single output.", "outputs");
-
- double[] output = new double[outputs.Length];
- for (int i = 0; i < outputs.Length; i++)
- output[i] = outputs[i][0];
-
- return Run(inputs, output);
- }
-
- ///
- /// Runs one iteration of the Reweighted Least Squares algorithm.
- ///
- /// The input data.
- /// The outputs associated with each input vector.
- /// The maximum relative change in the parameters after the iteration.
- ///
- public double Run(double[][] inputs, double[] outputs)
- {
- return Run(inputs, outputs, null);
- }
-
- ///
- /// Runs one iteration of the Reweighted Least Squares algorithm.
- ///
- ///
- /// The input data.
- /// The outputs associated with each input vector.
- /// An weight associated with each sample.
- ///
- /// The maximum relative change in the parameters after the iteration.
- ///
- public double Run(double[][] inputs, double[] outputs, double[] sampleWeights)
- {
- // Regress using Iteratively Reweighted Least Squares estimation.
-
- // References:
- // - Bishop, Christopher M.; Pattern Recognition
- // and Machine Learning. Springer; 1st ed. 2006.
-
- if (inputs.Length != outputs.Length)
- {
- throw new DimensionMismatchException("outputs",
- "The number of input vectors and their associated output values must have the same size.");
- }
-
- // Initial definitions and memory allocations
- int N = inputs.Length;
-
- double[][] design = new double[N][];
- double[] errors = new double[N];
- double[] weights = new double[N];
- double[] coefficients = this.regression.Coefficients;
- double[] deltas;
-
- // Compute the regression matrix
- for (int i = 0; i < inputs.Length; i++)
- {
- double[] row = design[i] = new double[parameterCount];
-
- row[0] = 1; // for intercept
- for (int j = 0; j < inputs[i].Length; j++)
- row[j + 1] = inputs[i][j];
- }
-
-
- // Compute errors and weighting matrix
- for (int i = 0; i < inputs.Length; i++)
- {
- double y = regression.Compute(inputs[i]);
-
- // Calculate error vector
- errors[i] = y - outputs[i];
-
- // Calculate weighting matrix
- weights[i] = regression.Link.Derivative2(y);
- }
-
- if (sampleWeights != null)
- {
- for (int i = 0; i < weights.Length; i++)
- {
- errors[i] *= sampleWeights[i];
- weights[i] *= sampleWeights[i];
- }
- }
-
-
-
- // Reset Hessian matrix and gradient
- Array.Clear(gradient, 0, gradient.Length);
- Array.Clear(hessian, 0, hessian.Length);
-
-
- // (Re-) Compute error gradient
- for (int j = 0; j < design.Length; j++)
- for (int i = 0; i < gradient.Length; i++)
- gradient[i] += design[j][i] * errors[j];
-
- // (Re-) Compute weighted "Hessian" matrix
- for (int k = 0; k < weights.Length; k++)
- {
- double[] row = design[k];
- for (int j = 0; j < row.Length; j++)
- for (int i = 0; i < row.Length; i++)
- hessian[j, i] += row[i] * row[j] * weights[k];
- }
-
-
- double w = coefficients.SquareEuclidean();
-
- if (!Double.IsInfinity(w))
- {
- for (int i = 0; i < gradient.Length; i++)
- gradient[i] -= lambda * w;
-
- for (int i = 0; i < parameterCount; i++)
- hessian[i, i] -= lambda;
- }
-
-
- // Decompose to solve the linear system. Usually the Hessian will
- // be invertible and LU will succeed. However, sometimes the Hessian
- // may be singular and a Singular Value Decomposition may be needed.
-
- // The SVD is very stable, but is quite expensive, being on average
- // about 10-15 times more expensive than LU decomposition. There are
- // other ways to avoid a singular Hessian. For a very interesting
- // reading on the subject, please see:
- //
- // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
- // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
- // Available in: http://gking.harvard.edu/files/help.pdf
- //
-
- // Moreover, the computation of the inverse is optional, as it will
- // be used only to compute the standard errors of the regression.
-
- // Hessian Matrix is singular, try pseudo-inverse solution
- decomposition = new SingularValueDecomposition(hessian);
- deltas = decomposition.Solve(gradient);
-
- previous = (double[])coefficients.Clone();
-
- // Update coefficients using the calculated deltas
- for (int i = 0; i < coefficients.Length; i++)
- coefficients[i] -= deltas[i];
-
-
- if (computeStandardErrors)
- {
- // Grab the regression information matrix
- double[,] inverse = decomposition.Inverse();
-
- // Calculate coefficients' standard errors
- double[] standardErrors = regression.StandardErrors;
- for (int i = 0; i < standardErrors.Length; i++)
- standardErrors[i] = Math.Sqrt(inverse[i, i]);
- }
-
-
- // Return the relative maximum parameter change
- for (int i = 0; i < deltas.Length; i++)
- deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);
-
- return Matrix.Max(deltas);
- }
-
- ///
- /// Computes the sum-of-squared error between the
- /// model outputs and the expected outputs.
- ///
- ///
- /// The input data set.
- /// The output values.
- ///
- /// The sum-of-squared errors.
- ///
- public double ComputeError(double[][] inputs, double[] outputs)
- {
- double sum = 0;
-
- for (int i = 0; i < inputs.Length; i++)
- {
- double actual = regression.Compute(inputs[i]);
- double expected = outputs[i];
- double delta = actual - expected;
- sum += delta * delta;
- }
-
- return sum;
- }
-
- }
-}
+// Accord Statistics Library
+// The Accord.NET Framework
+// http://accord-framework.net
+//
+// Copyright © César Souza, 2009-2015
+// cesarsouza at gmail.com
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+//
+
+namespace Accord.Statistics.Models.Regression.Fitting
+{
+ using System;
+ using Accord.Math;
+ using Accord.Math.Decompositions;
+ using Accord.Statistics.Links;
+
+ ///
+ /// Iterative Reweighted Least Squares for Logistic Regression fitting.
+ ///
+ ///
+ ///
+ ///
+ /// The Iterative Reweighted Least Squares is an iterative technique based
+ /// on the Newton-Raphson iterative optimization scheme. The IRLS method uses
+ /// a local quadratic approximation to the log-likelihood function.
+ ///
+ /// By applying the Newton-Raphson optimization scheme to the cross-entropy
+ /// error function (defined as the negative logarithm of the likelihood), one
+ /// arises at a weighted formulation for the Hessian matrix.
+ ///
+ ///
+ /// The Iterative Reweighted Least Squares algorithm can also be used to learn
+ /// arbitrary generalized linear models. However, the use of this class to learn
+ /// such models is currently experimental.
+ ///
+ ///
+ ///
+ /// References:
+ ///
+ /// -
+ /// Bishop, Christopher M.; Pattern Recognition and Machine Learning.
+ /// Springer; 1st ed. 2006.
+ /// -
+ /// Amos Storkey. (2005). Learning from Data: Learning Logistic Regressors. School of Informatics.
+ /// Available on: http://www.inf.ed.ac.uk/teaching/courses/lfd/lectures/logisticlearn-print.pdf
+ /// -
+ /// Cosma Shalizi. (2009). Logistic Regression and Newton's Method. Available on:
+ /// http://www.stat.cmu.edu/~cshalizi/350/lectures/26/lecture-26.pdf
+ /// -
+ /// Edward F. Conor. Logistic Regression. Website. Available on:
+ /// http://userwww.sfsu.edu/~efc/classes/biol710/logistic/logisticreg.htm
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// // Suppose we have the following data about some patients.
+ /// // The first variable is continuous and represent patient
+ /// // age. The second variable is dichotomic and give whether
+ /// // they smoke or not (This is completely fictional data).
+ /// double[][] input =
+ /// {
+ /// new double[] { 55, 0 }, // 0 - no cancer
+ /// new double[] { 28, 0 }, // 0
+ /// new double[] { 65, 1 }, // 0
+ /// new double[] { 46, 0 }, // 1 - have cancer
+ /// new double[] { 86, 1 }, // 1
+ /// new double[] { 56, 1 }, // 1
+ /// new double[] { 85, 0 }, // 0
+ /// new double[] { 33, 0 }, // 0
+ /// new double[] { 21, 1 }, // 0
+ /// new double[] { 42, 1 }, // 1
+ /// };
+ ///
+ /// // We also know if they have had lung cancer or not, and
+ /// // we would like to know whether smoking has any connection
+ /// // with lung cancer (This is completely fictional data).
+ /// double[] output =
+ /// {
+ /// 0, 0, 0, 1, 1, 1, 0, 0, 0, 1
+ /// };
+ ///
+ ///
+ /// // To verify this hypothesis, we are going to create a logistic
+ /// // regression model for those two inputs (age and smoking).
+ /// LogisticRegression regression = new LogisticRegression(inputs: 2);
+ ///
+ /// // Next, we are going to estimate this model. For this, we
+ /// // will use the Iteratively Reweighted Least Squares method.
+ /// var teacher = new IterativeReweightedLeastSquares(regression);
+ ///
+ /// // Now, we will iteratively estimate our model. The Run method returns
+ /// // the maximum relative change in the model parameters and we will use
+ /// // it as the convergence criteria.
+ ///
+ /// double delta = 0;
+ /// do
+ /// {
+ /// // Perform an iteration
+ /// delta = teacher.Run(input, output);
+ ///
+ /// } while (delta > 0.001);
+ ///
+ /// // At this point, we can compute the odds ratio of our variables.
+ /// // In the model, the variable at 0 is always the intercept term,
+ /// // with the other following in the sequence. Index 1 is the age
+ /// // and index 2 is whether the patient smokes or not.
+ ///
+ /// // For the age variable, we have that individuals with
+ /// // higher age have 1.021 greater odds of getting lung
+ /// // cancer controlling for cigarette smoking.
+ /// double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701
+ ///
+ /// // For the smoking/non smoking category variable, however, we
+ /// // have that individuals who smoke have 5.858 greater odds
+ /// // of developing lung cancer compared to those who do not
+ /// // smoke, controlling for age (remember, this is completely
+ /// // fictional and for demonstration purposes only).
+ /// double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331
+ ///
+ ///
+ ///
+ public class IterativeReweightedLeastSquares : IRegressionFitting
+ {
+
+ private GeneralizedLinearRegression regression;
+
+ private int parameterCount;
+
+ private double[,] hessian;
+ private double[] gradient;
+ private double[] previous;
+
+ private double lambda = 1e-10;
+
+
+ private bool computeStandardErrors = true;
+ private ISolverMatrixDecomposition decomposition;
+
+
+ ///
+ /// Gets the previous values for the coefficients which were
+ /// in place before the last learning iteration was performed.
+ ///
+ ///
+ public double[] Previous { get { return previous; } }
+
+ ///
+ /// Gets the current values for the coefficients.
+ ///
+ ///
+ public double[] Solution { get { return regression.Coefficients; } }
+
+ ///
+ /// Gets the Hessian matrix computed in
+ /// the last Newton-Raphson iteration.
+ ///
+ ///
+ public double[,] Hessian { get { return hessian; } }
+
+ ///
+ /// Gets the Gradient vector computed in
+ /// the last Newton-Raphson iteration.
+ ///
+ ///
+ public double[] Gradient { get { return gradient; } }
+
+ ///
+ /// Gets the total number of parameters in the model.
+ ///
+ ///
+ public int Parameters { get { return parameterCount; } }
+
+
+ ///
+ /// Gets or sets a value indicating whether standard
+ /// errors should be computed in the next iteration.
+ ///
+ ///
+ /// true to compute standard errors; otherwise, false.
+ ///
+ ///
+ public bool ComputeStandardErrors
+ {
+ get { return computeStandardErrors; }
+ set { computeStandardErrors = value; }
+ }
+
+ ///
+ /// Gets or sets the regularization value to be
+ /// added in the objective function. Default is
+ /// 1e-10.
+ ///
+ ///
+ public double Regularization
+ {
+ get { return lambda; }
+ set { lambda = value; }
+ }
+
+ ///
+ /// Constructs a new Iterative Reweighted Least Squares.
+ ///
+ ///
+ /// The regression to estimate.
+ ///
+ public IterativeReweightedLeastSquares(LogisticRegression regression)
+ {
+ constructor(GeneralizedLinearRegression.FromLogisticRegression(regression, makeCopy: false));
+ }
+
+ ///
+ /// Constructs a new Iterative Reweighted Least Squares.
+ ///
+ ///
+ /// The regression to estimate.
+ ///
+ public IterativeReweightedLeastSquares(GeneralizedLinearRegression regression)
+ {
+ constructor(regression);
+ }
+
+ private void constructor(GeneralizedLinearRegression regression)
+ {
+ if (regression == null)
+ throw new ArgumentNullException("regression");
+
+ this.regression = regression;
+ this.parameterCount = regression.Coefficients.Length;
+ this.hessian = new double[parameterCount, parameterCount];
+ this.gradient = new double[parameterCount];
+ }
+
+ ///
+ /// Runs one iteration of the Reweighted Least Squares algorithm.
+ ///
+ /// The input data.
+ /// The outputs associated with each input vector.
+ /// The maximum relative change in the parameters after the iteration.
+ ///
+ public double Run(double[][] inputs, int[] outputs)
+ {
+ return Run(inputs, outputs.Apply(x => x > 0 ? 1.0 : 0.0));
+ }
+
+ ///
+ /// Runs one iteration of the Reweighted Least Squares algorithm.
+ ///
+ ///
+ /// The input data.
+ /// The outputs associated with each input vector.
+ ///
+ /// The maximum relative change in the parameters after the iteration.
+ ///
+ public double Run(double[][] inputs, int[][] outputs)
+ {
+ if (outputs[0].Length != 1)
+ throw new ArgumentException("Function must have a single output.", "outputs");
+ double[] output = new double[outputs.Length];
+ for (int i = 0; i < outputs.Length; i++)
+ output[i] = outputs[i][0] > 0 ? 1.0 : 0.0;
+ return Run(inputs, output);
+ }
+
+ ///
+ /// Runs one iteration of the Reweighted Least Squares algorithm.
+ ///
+ ///
+ /// The input data.
+ /// The outputs associated with each input vector.
+ ///
+ /// The maximum relative change in the parameters after the iteration.
+ ///
+ public double Run(double[][] inputs, double[][] outputs)
+ {
+ if (outputs[0].Length != 1)
+ throw new ArgumentException("Function must have a single output.", "outputs");
+
+ double[] output = new double[outputs.Length];
+ for (int i = 0; i < outputs.Length; i++)
+ output[i] = outputs[i][0];
+
+ return Run(inputs, output);
+ }
+
+ ///
+ /// Runs one iteration of the Reweighted Least Squares algorithm.
+ ///
+ /// The input data.
+ /// The outputs associated with each input vector.
+ /// The maximum relative change in the parameters after the iteration.
+ ///
+ public double Run(double[][] inputs, double[] outputs)
+ {
+ return Run(inputs, outputs, null);
+ }
+
+ ///
+ /// Runs one iteration of the Reweighted Least Squares algorithm.
+ ///
+ ///
+ /// The input data.
+ /// The outputs associated with each input vector.
+ /// An weight associated with each sample.
+ ///
+ /// The maximum relative change in the parameters after the iteration.
+ ///
+ public double Run(double[][] inputs, double[] outputs, double[] sampleWeights)
+ {
+ // Regress using Iteratively Reweighted Least Squares estimation.
+
+ // References:
+ // - Bishop, Christopher M.; Pattern Recognition
+ // and Machine Learning. Springer; 1st ed. 2006.
+
+ if (inputs.Length != outputs.Length)
+ {
+ throw new DimensionMismatchException("outputs",
+ "The number of input vectors and their associated output values must have the same size.");
+ }
+
+ // Initial definitions and memory allocations
+ int N = inputs.Length;
+
+ double[][] design = new double[N][];
+ double[] errors = new double[N];
+ double[] weights = new double[N];
+ double[] coefficients = this.regression.Coefficients;
+ double[] deltas;
+
+ // Compute the regression matrix
+ for (int i = 0; i < inputs.Length; i++)
+ {
+ double[] row = design[i] = new double[parameterCount];
+
+ row[0] = 1; // for intercept
+ for (int j = 0; j < inputs[i].Length; j++)
+ row[j + 1] = inputs[i][j];
+ }
+
+
+ // Compute errors and weighting matrix
+ for (int i = 0; i < inputs.Length; i++)
+ {
+ double y = regression.Compute(inputs[i]);
+
+ // Calculate error vector
+ errors[i] = y - outputs[i];
+
+ // Calculate weighting matrix
+ weights[i] = regression.Link.Derivative2(y);
+ }
+
+ if (sampleWeights != null)
+ {
+ for (int i = 0; i < weights.Length; i++)
+ {
+ errors[i] *= sampleWeights[i];
+ weights[i] *= sampleWeights[i];
+ }
+ }
+
+
+
+ // Reset Hessian matrix and gradient
+ Array.Clear(gradient, 0, gradient.Length);
+ Array.Clear(hessian, 0, hessian.Length);
+
+
+ // (Re-) Compute error gradient
+ for (int j = 0; j < design.Length; j++)
+ for (int i = 0; i < gradient.Length; i++)
+ gradient[i] += design[j][i] * errors[j] + lambda * coefficients[i];
+
+ // (Re-) Compute weighted "Hessian" matrix
+ for (int k = 0; k < weights.Length; k++)
+ {
+ double[] row = design[k];
+ for (int j = 0; j < row.Length; j++)
+ for (int i = 0; i < row.Length; i++)
+ hessian[j, i] += row[i] * row[j] * weights[k];
+ }
+
+
+
+ // Decompose to solve the linear system. Usually the Hessian will
+ // be invertible and LU will succeed. However, sometimes the Hessian
+ // may be singular and a Singular Value Decomposition may be needed.
+
+ // The SVD is very stable, but is quite expensive, being on average
+ // about 10-15 times more expensive than LU decomposition. There are
+ // other ways to avoid a singular Hessian. For a very interesting
+ // reading on the subject, please see:
+ //
+ // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
+ // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
+ // Available in: http://gking.harvard.edu/files/help.pdf
+ //
+
+ // Moreover, the computation of the inverse is optional, as it will
+ // be used only to compute the standard errors of the regression.
+
+ // Hessian Matrix is singular, try pseudo-inverse solution
+ decomposition = new SingularValueDecomposition(hessian);
+ deltas = decomposition.Solve(gradient);
+
+ previous = (double[])coefficients.Clone();
+
+ // Update coefficients using the calculated deltas
+ for (int i = 0; i < coefficients.Length; i++)
+ coefficients[i] -= deltas[i];
+
+
+ if (computeStandardErrors)
+ {
+ // Grab the regression information matrix
+ double[,] inverse = decomposition.Inverse();
+
+ // Calculate coefficients' standard errors
+ double[] standardErrors = regression.StandardErrors;
+ for (int i = 0; i < standardErrors.Length; i++)
+ standardErrors[i] = Math.Sqrt(inverse[i, i]);
+ }
+
+
+ // Return the relative maximum parameter change
+ for (int i = 0; i < deltas.Length; i++)
+ deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);
+
+ return Matrix.Max(deltas);
+ }
+
+ ///
+ /// Computes the sum-of-squared error between the
+ /// model outputs and the expected outputs.
+ ///
+ ///
+ /// The input data set.
+ /// The output values.
+ ///
+ /// The sum-of-squared errors.
+ ///
+ public double ComputeError(double[][] inputs, double[] outputs)
+ {
+ double sum = 0;
+
+ for (int i = 0; i < inputs.Length; i++)
+ {
+ double actual = regression.Compute(inputs[i]);
+ double expected = outputs[i];
+ double delta = actual - expected;
+ sum += delta * delta;
+ }
+
+ return sum;
+ }
+
+ }
+}
diff --git a/Unit Tests/Accord.Tests.Statistics/Accord.Tests.Statistics.csproj b/Unit Tests/Accord.Tests.Statistics/Accord.Tests.Statistics.csproj
index 93e9b6f00..14653e57c 100644
--- a/Unit Tests/Accord.Tests.Statistics/Accord.Tests.Statistics.csproj
+++ b/Unit Tests/Accord.Tests.Statistics/Accord.Tests.Statistics.csproj
@@ -1,76 +1,76 @@
-
-
-
- {49679C95-28CE-4D35-8F38-3D67A511A3C1}
- Accord.Tests.Statistics
- Accord.Tests.Statistics
- {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
- Debug
- AnyCPU
- Library
- Properties
- v4.5
- 512
- 3.5
- true
- 0
- 1.0.0.%2a
- false
- true
-
- true
- true
- pdbonly
- AnyCPU
- prompt
- 4
- true
- true
- Accord.snk
-
-
- true
- full
- false
- bin\Debug\
- DEBUG;TRACE
-
-
- bin\Release\net35
- TRACE;NET35
-
-
- bin\Release\net40
- TRACE;NET40
-
-
- bin\Release\net45
- TRACE;NET45
-
-
-
- False
- ..\..\Externals\AForge.NET\AForge.dll
-
-
- False
- ..\..\Externals\AForge.NET\AForge.Math.dll
-
-
- False
- ..\..\Externals\AForge.NET\AForge.Neuro.dll
-
-
-
- 3.5
-
-
-
-
-
- c:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE\PublicAssemblies\Microsoft.VisualStudio.QualityTools.UnitTestFramework.dll
-
-
+
+
+
+ {49679C95-28CE-4D35-8F38-3D67A511A3C1}
+ Accord.Tests.Statistics
+ Accord.Tests.Statistics
+ {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
+ Debug
+ AnyCPU
+ Library
+ Properties
+ v4.5
+ 512
+ 3.5
+ true
+ 0
+ 1.0.0.%2a
+ false
+ true
+
+ true
+ true
+ pdbonly
+ AnyCPU
+ prompt
+ 4
+ true
+ true
+ Accord.snk
+
+
+ true
+ full
+ false
+ bin\Debug\
+ DEBUG;TRACE
+
+
+ bin\Release\net35
+ TRACE;NET35
+
+
+ bin\Release\net40
+ TRACE;NET40
+
+
+ bin\Release\net45
+ TRACE;NET45
+
+
+
+ False
+ ..\..\Externals\AForge.NET\AForge.dll
+
+
+ False
+ ..\..\Externals\AForge.NET\AForge.Math.dll
+
+
+ False
+ ..\..\Externals\AForge.NET\AForge.Neuro.dll
+
+
+
+ 3.5
+
+
+
+
+
+ c:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE\PublicAssemblies\Microsoft.VisualStudio.QualityTools.UnitTestFramework.dll
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Code
-
-
- Code
-
-
-
- True
- True
- Resources.resx
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {0AB27A20-925C-4556-9FA4-6E2C109E448E}
- Accord.Controls
-
-
- {A177A90C-8207-466A-AF70-F2B8452A42AC}
- Accord.Core
-
-
- {63184EBD-6B28-4066-AAEE-5B99431E31F4}
- Accord.IO
-
-
- {7AB4BBCC-6222-423D-9FF9-BA9CB7C09199}
- Accord.MachineLearning
-
-
- {F718E9A8-DB62-4785-8C49-4333A60D256A}
- Accord.Math
- True
-
-
- {179F3045-8757-4F4B-9508-F48327BA11E3}
- Accord.Neuro
-
-
- {FD8101DD-C95D-42D6-AD44-AE01C25F2811}
- Accord.Statistics
-
-
-
-
- False
- Microsoft .NET Framework 4 %28x86 and x64%29
- true
-
-
- False
- .NET Framework 3.5 SP1 Client Profile
- false
-
-
- False
- .NET Framework 3.5 SP1
- false
-
-
- False
- Windows Installer 3.1
- true
-
-
-
-
-
-
-
-
- Always
-
-
-
- PreserveNewest
-
-
- PreserveNewest
-
-
-
-
-
-
-
- ResXFileCodeGenerator
- Resources.Designer.cs
- Designer
-
-
-
+ -->
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Code
+
+
+ Code
+
+
+
+ True
+ True
+ Resources.resx
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {0AB27A20-925C-4556-9FA4-6E2C109E448E}
+ Accord.Controls
+
+
+ {A177A90C-8207-466A-AF70-F2B8452A42AC}
+ Accord.Core
+
+
+ {63184EBD-6B28-4066-AAEE-5B99431E31F4}
+ Accord.IO
+
+
+ {7AB4BBCC-6222-423D-9FF9-BA9CB7C09199}
+ Accord.MachineLearning
+
+
+ {F718E9A8-DB62-4785-8C49-4333A60D256A}
+ Accord.Math
+ True
+
+
+ {179F3045-8757-4F4B-9508-F48327BA11E3}
+ Accord.Neuro
+
+
+ {FD8101DD-C95D-42D6-AD44-AE01C25F2811}
+ Accord.Statistics
+
+
+
+
+ False
+ Microsoft .NET Framework 4 %28x86 and x64%29
+ true
+
+
+ False
+ .NET Framework 3.5 SP1 Client Profile
+ false
+
+
+ False
+ .NET Framework 3.5 SP1
+ false
+
+
+ False
+ Windows Installer 3.1
+ true
+
+
+
+
+
+
+
+
+ Always
+
+
+
+ PreserveNewest
+
+
+ PreserveNewest
+
+
+
+
+
+
+
+
+ ResXFileCodeGenerator
+ Resources.Designer.cs
+ Designer
+
+
+
+ -->
\ No newline at end of file
diff --git a/Unit Tests/Accord.Tests.Statistics/Analysis/LogisticRegressionAnalysisTest.cs b/Unit Tests/Accord.Tests.Statistics/Analysis/LogisticRegressionAnalysisTest.cs
index 3966c7a47..7b542b20a 100644
--- a/Unit Tests/Accord.Tests.Statistics/Analysis/LogisticRegressionAnalysisTest.cs
+++ b/Unit Tests/Accord.Tests.Statistics/Analysis/LogisticRegressionAnalysisTest.cs
@@ -56,7 +56,7 @@ public void ComputeTest1()
double[][] inputs = training.Submatrix(null, 0, 3);
double[] outputs = training.GetColumn(4);
- LogisticRegressionAnalysis regression = new LogisticRegressionAnalysis(inputs, outputs);
+ var regression = new LogisticRegressionAnalysis(inputs, outputs);
bool converged = regression.Compute();
Assert.IsTrue(converged);
@@ -93,7 +93,9 @@ public void ComputeTest2()
double[] trainOutput = { 1, 0 };
double[] testInput = { 0, 0.2 };
- LogisticRegressionAnalysis target = new LogisticRegressionAnalysis(trainInput, trainOutput);
+ LogisticRegressionAnalysis target = new LogisticRegressionAnalysis(trainInput, trainOutput);
+
+ target.Regularization = 0;
target.Compute();
@@ -177,7 +179,9 @@ public void ComputeTest4()
};
// Create a Logistic Regression analysis
- var regression = new LogisticRegressionAnalysis(inputs, output);
+ var regression = new LogisticRegressionAnalysis(inputs, output);
+
+ regression.Regularization = 0;
regression.Compute(); // compute the analysis.
diff --git a/Unit Tests/Accord.Tests.Statistics/Models/Markov/HiddenMarkovModel`1Test.cs b/Unit Tests/Accord.Tests.Statistics/Models/Markov/HiddenMarkovModel`1Test.cs
index d87cae6f4..4165ea4b3 100644
--- a/Unit Tests/Accord.Tests.Statistics/Models/Markov/HiddenMarkovModel`1Test.cs
+++ b/Unit Tests/Accord.Tests.Statistics/Models/Markov/HiddenMarkovModel`1Test.cs
@@ -1340,7 +1340,9 @@ public void LearnTest13()
[TestMethod()]
public void BigSampleLearnTest13()
- {
+ {
+ Accord.Math.Tools.SetupGenerator(0);
+
var list = new List();
for (int i = 0; i < 1000000; i++)
diff --git a/Unit Tests/Accord.Tests.Statistics/Models/Regression/LogisticRegressionTest.cs b/Unit Tests/Accord.Tests.Statistics/Models/Regression/LogisticRegressionTest.cs
index 82d73d586..2079f1c9c 100644
--- a/Unit Tests/Accord.Tests.Statistics/Models/Regression/LogisticRegressionTest.cs
+++ b/Unit Tests/Accord.Tests.Statistics/Models/Regression/LogisticRegressionTest.cs
@@ -22,6 +22,7 @@
namespace Accord.Tests.Statistics
{
+ using Accord.IO;
using Accord.Math;
using Accord.Statistics.Analysis;
using Accord.Statistics.Models.Regression;
@@ -87,6 +88,8 @@ public void ComputeTest()
// will use the Iteratively Reweighted Least Squares method.
var teacher = new IterativeReweightedLeastSquares(regression);
+ teacher.Regularization = 0;
+
// Now, we will iteratively estimate our model. The Run method returns
// the maximum relative change in the model parameters and we will use
// it as the convergence criteria.
@@ -273,6 +276,8 @@ public void ComputeTest3()
var teacher = new IterativeReweightedLeastSquares(regression);
+ teacher.Regularization = 0;
+
double delta = 0;
do
@@ -391,7 +396,7 @@ public void LargeCoefficientsTest()
var teacher = new IterativeReweightedLeastSquares(regression);
- teacher.Regularization = 1e-5;
+ teacher.Regularization = 1e-10;
var errors = new List();
for (int i = 0; i < 1000; i++)
@@ -410,10 +415,47 @@ public void LargeCoefficientsTest()
error /= output.Length;
Assert.AreEqual(error, 0);
- Assert.AreEqual(-58.817944701474687, regression.Coefficients[0]);
- Assert.AreEqual(0.13783960821658245, regression.Coefficients[1]);
- Assert.AreEqual(-1.532885090757945, regression.Coefficients[2]);
- Assert.AreEqual(7.9460105648631973, regression.Coefficients[3]);
+ Assert.AreEqual(-355.59378247276379, regression.Coefficients[0]);
+ Assert.AreEqual(1.2646432605797491, regression.Coefficients[1]);
+ Assert.AreEqual(-10.710529810144157, regression.Coefficients[2]);
+ Assert.AreEqual(44.089493151268726, regression.Coefficients[3]);
}
+
+ [TestMethod()]
+ public void RegularizationTest2()
+ {
+ CsvReader reader = CsvReader.FromText(Properties.Resources.regression, true);
+
+ double[][] data = reader.ToTable().ToArray();
+
+ double[][] inputs = data.GetColumns(0, 1);
+
+ double[] output = data.GetColumn(2);
+
+ var regression = new LogisticRegression(2);
+ var irls = new IterativeReweightedLeastSquares(regression);
+
+ double error = irls.Run(inputs, output);
+ double newError = 0;
+
+ for (int i = 0; i < 50; i++)
+ newError = irls.Run(inputs, output);
+
+ double actual = irls.ComputeError(inputs, output);
+ Assert.AreEqual(30.507262964894068, actual, 1e-8);
+
+ Assert.AreEqual(3, regression.Coefficients.Length);
+ Assert.AreEqual(-0.38409721299838279, regression.Coefficients[0]);
+ Assert.AreEqual(0.1065137931017601, regression.Coefficients[1]);
+ Assert.AreEqual(17.275849279015059, regression.Coefficients[2]);
+
+ for (int i = 0; i < 50; i++)
+ newError = irls.Run(inputs, output);
+
+ Assert.AreEqual(-0.38409721299838279, regression.Coefficients[0], 1e-8);
+ Assert.AreEqual(0.1065137931017601, regression.Coefficients[1], 1e-8);
+ Assert.AreEqual(17.275849279015059, regression.Coefficients[2], 1e-8);
+ }
+
}
}
diff --git a/Unit Tests/Accord.Tests.Statistics/Properties/Resources.Designer.cs b/Unit Tests/Accord.Tests.Statistics/Properties/Resources.Designer.cs
index 55f851634..55c6094d3 100644
--- a/Unit Tests/Accord.Tests.Statistics/Properties/Resources.Designer.cs
+++ b/Unit Tests/Accord.Tests.Statistics/Properties/Resources.Designer.cs
@@ -1,7 +1,7 @@
//------------------------------------------------------------------------------
//
// This code was generated by a tool.
-// Runtime Version:4.0.30319.18444
+// Runtime Version:4.0.30319.34209
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
@@ -70,6 +70,70 @@ internal static byte[] CircleWithWeights {
}
}
+ ///
+ /// Looks up a localized string similar to In1,In2,Out
+ ///15.3,0,1
+ ///15,0,1
+ ///3.2,1,1
+ ///7.5,0,0
+ ///10.2,0,1
+ ///10.9,0,1
+ ///14.5,0,0
+ ///5.7,0,0
+ ///5,0,1
+ ///12.5,0,1
+ ///10,0,0
+ ///7.2,0,1
+ ///13,0,1
+ ///5.7,0,0
+ ///6.8,0,1
+ ///7.81,0,0
+ ///14.3,0,0
+ ///12.3,0,1
+ ///9,0,1
+ ///5,0,1
+ ///4.8,0,0
+ ///2.79,0,1
+ ///7,0,0
+ ///8.33,0,1
+ ///14,0,1
+ ///10.3,0,1
+ ///11,0,1
+ ///6.41,0,0
+ ///6.77,0,1
+ ///7.52,0,1
+ ///5.67,0,1
+ ///6.94,0,1
+ ///6.86,0,1
+ ///4.8,0,1
+ ///6.8,0,1
+ ///4.2,0,1
+ ///6.1,0,1
+ ///5.53,0,1
+ ///12.6,0,0
+ ///13,0,1
+ ///7.2,0,0
+ ///14,0,1
+ ///12.2,0,1
+ ///7.49,0,1
+ ///10.2,0,1
+ ///16.2,0,1
+ ///7,0,0
+ ///8,0,1
+ ///4.5,0,1
+ ///5.02,0,1
+ ///6.7,0,1
+ ///16.2,0,1
+ ///13.7,0,1
+ ///10.2,0,1
+ ///5.09, [rest of string was truncated]";.
+ ///
+ internal static string regression {
+ get {
+ return ResourceManager.GetString("regression", resourceCulture);
+ }
+ }
+
///
/// Looks up a localized resource of type System.Byte[].
///
diff --git a/Unit Tests/Accord.Tests.Statistics/Properties/Resources.resx b/Unit Tests/Accord.Tests.Statistics/Properties/Resources.resx
index bd259e478..80d4b396b 100644
--- a/Unit Tests/Accord.Tests.Statistics/Properties/Resources.resx
+++ b/Unit Tests/Accord.Tests.Statistics/Properties/Resources.resx
@@ -121,6 +121,9 @@
..\Resources\CircleWithWeights.xls;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
+
+ ..\resources\regression.csv;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8
+
..\Resources\sample.xls;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
diff --git a/Unit Tests/Accord.Tests.Statistics/Resources/regression.csv b/Unit Tests/Accord.Tests.Statistics/Resources/regression.csv
new file mode 100644
index 0000000000..683d35c97
--- /dev/null
+++ b/Unit Tests/Accord.Tests.Statistics/Resources/regression.csv
@@ -0,0 +1,146 @@
+In1,In2,Out
+15.3,0,1
+15,0,1
+3.2,1,1
+7.5,0,0
+10.2,0,1
+10.9,0,1
+14.5,0,0
+5.7,0,0
+5,0,1
+12.5,0,1
+10,0,0
+7.2,0,1
+13,0,1
+5.7,0,0
+6.8,0,1
+7.81,0,0
+14.3,0,0
+12.3,0,1
+9,0,1
+5,0,1
+4.8,0,0
+2.79,0,1
+7,0,0
+8.33,0,1
+14,0,1
+10.3,0,1
+11,0,1
+6.41,0,0
+6.77,0,1
+7.52,0,1
+5.67,0,1
+6.94,0,1
+6.86,0,1
+4.8,0,1
+6.8,0,1
+4.2,0,1
+6.1,0,1
+5.53,0,1
+12.6,0,0
+13,0,1
+7.2,0,0
+14,0,1
+12.2,0,1
+7.49,0,1
+10.2,0,1
+16.2,0,1
+7,0,0
+8,0,1
+4.5,0,1
+5.02,0,1
+6.7,0,1
+16.2,0,1
+13.7,0,1
+10.2,0,1
+5.09,0,0
+4.88,0,1
+3.11,0,0
+4.09,0,1
+16.3,0,1
+11.4,0,1
+7,0,1
+4.24,0,0
+6.89,0,1
+9.45,0,1
+12.1,0,0
+7.6,0,0
+7.42,0,0
+6.24,0,0
+4.89,1,1
+10.2,0,1
+9.5,0,1
+17.8,0,0
+3.3,1,1
+7.42,0,1
+12.3,0,1
+15.66,0,1
+4.65,0,1
+10.5,0,1
+4.1,0,0
+5.77,0,0
+4.07,0,0
+8.19,0,1
+9.8,0,0
+3.3,1,1
+11,0,1
+10.5,0,1
+8.91,0,1
+7,0,0
+7.7,1,1
+5.8,0,0
+5.7,1,1
+5.06,1,1
+15,0,1
+7.19,0,1
+3.39,0,0
+6.4,0,1
+8.16,0,0
+10.3,0,1
+3,0,0
+11.2,0,1
+12,0,1
+8.16,0,0
+9.48,0,1
+3.4,1,1
+10,0,1
+9,0,1
+11.2,0,1
+5.66,0,1
+7.25,0,1
+15,1,1
+13,1,1
+5,0,0
+8,0,0
+15,0,0
+14.6,0,0
+3.15,0,0
+9.97,0,0
+19.6,0,0
+16.8,0,1
+3.49,0,0
+3.4,0,0
+6,0,0
+6,0,0
+2.82,0,0
+5.19,0,1
+7.9,0,1
+6.35,0,0
+6.94,0,1
+5.7,0,1
+3.37,0,0
+11.8,0,0
+12.4,0,1
+5.9,0,0
+5.8,0,1
+7.4,0,0
+11.34,0,0
+14.3,0,1
+3.78,0,0
+7.96,0,0
+8.1,0,1
+7.3,0,1
+5.14,0,1
+4.8,0,1
+4.8,0,1
+2.76,0,1
\ No newline at end of file