Simple Neural Network in C#

The goal of this article is to allow you to quickly get a simple neural network (NN) running in C#.  Code is provided below including a run-able example.  A brief overview of what the code does is included here, but for more details see here (lecture 10 specifically). 

Neural Net Summary

This NN tries to learn a function which outputs one real number given a list of Q real numbers as input.  To learn this function the network must first be given several training points, each a list of Q real number inputs with one real number output.  With each training point provided, the NN will automatically adjust its internal parameters to make the model function better fit the sample data points.

Operation of this NN

The NN is organized into L “layers,” with each layer l containing D(l) "nodes."  The first layer contains Q+1 nodes corresponding to the Q input values plus the constant 1.  The second layer contains a possibly different number of nodes, each of which  receives a linear combination C of values from the first layer as input and outputs F(C) to the next (3rd) layer (the coefficients of these linear combinations are the parameters of the NN which are learned, and F is any real valued function provided by the user, e.g. tanh in the example below).  Each node in the third layer receives a linear combination of output values from the second layer and passes that result through F to compute it's output.  This continues iteratively until the last layer.  The output of the single node on the last layer L (i.e. the result of F of the linear combination of outputs from layer L-1) is the function estimate.

Source Code Including a Simple Example

The source code is below.  The neural network is in the class NeuralNetwork below.  There is also a class TestNeuralNet which creates 600 random training points to train a neural net with 2 inputs, 2 layers, and 3 nodes (including constant) at each layer.  It's using tanh as the function F at each node (the output values will be in the range from -1 to +1 exclusive).  The function it is trying to learn is +1 when both input values are positive, otherwise -1.  After creating and training the network it tests the network on a grid of points from -15 to +15.  You can run it and see how well it does!  It outputs the points that are above 0 to file, so the points in that file should be nearly the square from 0-15 for both values (you can open it in a spreadsheet application and plot one of the values against the other to check).

 using System;
using System.IO;
using System.Linq;

namespace NeuralNet
{
    class NeuralNetwork
    {
        private double[][,] w; // weights [layer,fromNodeId,toNodeId]
        private double[][] xnn; // function outputs [layer,dep]
        private double[][] s;   // linear combos
        private double[][] ds;  // back prop derivatives
        private int L; // number of layers
        private int[] D; // number of nodes at each layer, not including the constant/1 terms
        private Func<double, double> NodeFunc;
        private Func<double, double> NodeDeriv;

        public NeuralNetwork(int nivars, int nlayers, int depth, Func<double, double> f, Func<double, double> df)
        {
            L = nlayers;
            D = new int[L + 1];
            D[0] = nivars;
            for (int l = 1; l < L; l++) D[l] = depth;
            D[L] = 1;
            w = new double[L][,]; //[Math.Max(depth, nivars)][Math.Max(depth, nivars)];
            s = new double[L+1][];
            xnn = new double[L+1][];
            ds = new double[L+1][];
            Random random = new Random();
            for (int l = 0; l <= L; l++)
            {
                if (l < L)
                {
                    w[l] = new double[D[l]+1, D[l + 1]];
                    for (int i = 0; i < D[l]+1; i++)
                        for (int j = 0; j < D[l + 1]; j++)
                            w[l][i, j] = random.NextDouble();//1D;
                }
                xnn[l] = (l < L) ? new double[D[l] + 1] : new double[D[l]];
                if (l < L) xnn[l][D[l]] = 1D;
                s[l] = new double[D[l]];
                ds[l] = new double[D[l]];
            }
            NodeFunc = f;
            NodeDeriv = df;
        }

        public void Train(double[][] x, double[] y, int nSteps=0, double eta=.1)
        {
            int N = y.Count(), n;
            int niter = Math.Max(N*15, nSteps);
            Random random = new Random();
            for (int iter = 0; iter < niter; iter++)
            {
                n = random.Next(0, N);
                AddTrainingPoint(x[n], y[n], niters:1, eta:eta);
            }
        }

        public void AddTrainingPoint(double[] x, double y, int niters=6, double eta=.1)
        {
            for (int iter = 0; iter < niters; iter++)
            {
                Eval(x);
                computeDs(y);
                for (int l = 0; l < L; l++)
                    for (int i = 0; i < D[l] + 1; i++)
                        for (int j = 0; j < D[l + 1]; j++)
                            w[l][i, j] -= eta*xnn[l][i]*ds[l + 1][j];
            }
        }

        public void AppendWeightsToFile(string fname = @"d:\weights.csv")
        {
            string s = "";
            for (int l = 0; l < L; l++)
                for (int i = 0; i < D[l] + 1; i++)
                    for (int j = 0; j < D[l + 1]; j++)
                        s += w[l][i, j]+",";
            File.AppendAllText(fname, s + Environment.NewLine);
        }

        // computes one component of the derivative of the error, 
        // The "ds" here are the "deltas" from Yaser's lecture 10
        private void computeDs(double y)
        {
            ds[L][0] = 2*(xnn[L][0] - y)*NodeDeriv(s[L][0]);
            for (int l = L - 1; l > 0; l--)
            {
                for (int i = 0; i < D[l]; i++)
                {
                    ds[l][i] = 0D;
                    for (int j = 0; j < D[l + 1]; j++) ds[l][i] += ds[l + 1][j]*NodeDeriv(s[l][i]);
                }
            }
        }

        // Feed the data through the network and return what comes out of the last layer
        public double Eval(double[] x)
        {
            for (int i = 0; i < D[0]; i++) xnn[0][i] = x[i];
            for (int l = 1; l <= L; l++)
            {
                for (int j = 0; j < D[l]; j++)
                {
                    s[l][j] = 0D;
                    for (int i = 0; i < D[l - 1]+1; i++) s[l][j] += w[l - 1][i, j]*xnn[l - 1][i];
                    xnn[l][j] = NodeFunc(s[l][j]);
                }
            }
            return xnn[L][0];
        }
    }


    class TestNeuralNet
    {
        static double tanh(double x)
        {         // this will be used as the function at each node
            return Math.Tanh(x);
        }

        // derivative of the function at each node
        static double dtanh(double x)
        {
            return 1D - Math.Pow(tanh(x), 2);
        }

        static void Main(string[] args)
        {
            Random random = new Random();
            int nSamplePts = 300; // number of sample points use to train the neural net
            double[][] x = new double[nSamplePts][];
            double[] y = new double[nSamplePts];

            for (int i = 0; i < nSamplePts; i++)
            { // create the sample data points, the function is to return 1 if both values > 0 or -1 otherwise
                x[i] = new double[] {random.NextDouble()*30-15, random.NextDouble()*30-15};
                y[i] = (x[i][0] > 0D && x[i][1] > 0D) ? 1D : -1D;
            }
            
            NeuralNetwork nn = new NeuralNetwork(2, 2, 2, tanh, dtanh); // setup the neural net
            nn.Train(x, y, 6000);
            nn.AppendWeightsToFile();

            // test the 
            for (double tx = -15D; tx <= 15; tx += .25)
                for (double ty = -15D; ty <= 15; ty += .25)
                     if (nn.Eval(new double[]{tx,ty}) > 0) File.AppendAllText(@"d:\tdata.csv",  tx +","+ ty+Environment.NewLine);

        }
    }
}

Comments

Popular Posts