Computing a (2D) confidence region in C# using samples from a normal distribution
The goal of this article is to show you how to create a confidence region from sample data in C#. It's assumed here that you have 2D sample data (assumed to come from a normal distribution with unknown mean and variance). We do this the "long way" using only libraries readily available in C# (assuming you don't or can't have access any specific math/science libraries).
We'll use the variable names (instead of latex or other symbols) to keep it consistent with the code. The variables used are described here.
Although it seems very awkward, the only way I could get the value of iCF(2,n-2,p) above with libraries included in Visual Studio was to use a Microsoft charting object. After creating a "Chart" object c, the inverse cumulative F distribution can be accessed via c.DataManipulator.Statistics.InverseFDistriution(2,n-2,p). In order to use this object you'll need to do the following things in Visual Studio.
We'll use the variable names (instead of latex or other symbols) to keep it consistent with the code. The variables used are described here.
- x1 and x2 are lists of n floating point values where (x1[i], x2[i]) is the i'th sample point we have available to us.
- Using above we can compute the mean of the first and second values mx1 and mx2, or just mx as a vector including both values.
- The covariance matrix of x1 and x2 is named cx12 and its inverse icx12. List extensions are used to compute these, see the source code down below.
Although it seems very awkward, the only way I could get the value of iCF(2,n-2,p) above with libraries included in Visual Studio was to use a Microsoft charting object. After creating a "Chart" object c, the inverse cumulative F distribution can be accessed via c.DataManipulator.Statistics.InverseFDistriution(2,n-2,p). In order to use this object you'll need to do the following things in Visual Studio.
- Add a reference to the assembly System.Windows.Forms.
- Add a reference to the assembly System.Windows.Forms.DataVisualization.
- Include the statement "using System.Windows.Forms.DataVisualization.Charting;" at the top of the source file.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms.DataVisualization.Charting;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Chart c = new Chart(); // only here to access the stat functions
List<double> x1 = new List<double>() {1.348314607, 0.438116101, 1.332308936, -0.108108108, 1.122833493, 1.235955056, 0.882028666, 0.981461287, 2.696323226, 1.888888889, 1.111111111, 1.235955056, 0.87431694, 2.359550562, 0.674157303, 2.922077922, 1.234567901, 0.612244898, 1.13, 0.95, 1.05};
List<double> x2 = new List<double>() {0.786516854, 0.766703176, 0.777180213, 1.72972973, 0.224566699, 0.337078652, 0, 0.109051254, 1.01112121, 0.222222222, 0.222222222, 0.449438202, 0.327868852, 0.786516854, 0.224719101, 0.541125541, 0.561167228, 0.816326531, 0.83, 0.64, 0.71};
List<double> mx = new List<double>() { x1.Mean(), x2.Mean() };
List<double> icx12 = x1.CovarianceMatrix(x2).Invert(), meanDiff;
Console.WriteLine("cx12: "+string.Join(", ", x1.CovarianceMatrix(x2)));
Console.WriteLine("icx12: " + string.Join(", ", x1.CovarianceMatrix(x2).Invert()));
int n = x1.Count;
double probability = 0.05; //95% confidence
double intervalLimit = 2*(n - 1)*c.DataManipulator.Statistics.InverseFDistribution(probability, 2, n - 2)/(n-2);
// iterate over a grid of points and test whether each one is in the confidence region
Console.WriteLine("Of the tested points, the following were in the confidence region");
for (double testx1 = -3.0; testx1 < 3.0; testx1 += 0.05)
{
for (double testx2 = -3.0; testx2 < 3.0; testx2 += 0.05)
{
meanDiff = mx.Minus(new List<double>() {testx1, testx2});
double v = n*(meanDiff.Dot(icx12.Mult(meanDiff)));
if (v < intervalLimit)
Console.WriteLine(testx1+", "+testx2);
}
}
Console.WriteLine("Done, press a key to exit");
Console.ReadKey();
}
}
public static class ListExtensions
{
public static double Mean(this List<double> values)
{
return values.Sum()/values.Count;
}
public static List<double> Minus(this List<double> values, List<double> vals)
{
return values.Select((v, i) => v - vals[i]).ToList();
}
public static double Variance(this List<double> values)
{
return values.Select(v => Math.Pow(v - values.Mean(), 2)).Sum() / (values.Count - 1);
}
public static double Covariance(this List<double> values, List<double> vals)
{
double m0 = values.Mean(), m1 = vals.Mean();
return Enumerable.Range(0, vals.Count).Select(i => (values[i] - m0) * (vals[i] - m1)).Sum() / (values.Count - 1);
}
// returns the covariance matrix of the values to vals, as a list {e11,e12,e21,e22}
public static List<double> CovarianceMatrix(this List<double> values, List<double> vals)
{
return new List<double>() { values.Variance(), values.Covariance(vals), vals.Covariance(values), vals.Variance() };
}
public static List<double> Invert(this List<double> values)
{
double det = values[0] * values[3] - values[1] * values[2];
return new List<double>() { values[3] / det, -values[1] / det, -values[2] / det, values[0] / det };
}
public static double Dot(this List<double> v0, List<double> v1)
{
return Enumerable.Range(0, v0.Count).Select(i => v0[i] * v1[i]).Sum();
}
// matrix mult vec assuming values is 2x2 as a 4list and col is 2x1 as a 2list
public static List<double> Mult(this List<double> values, List<double> col)
{
return new List<double>() { values[0] * col[0] + values[1] * col[1], values[2] * col[0] + values[3] * col[1] };
}
}
}

Comments
Post a Comment