Backpropagation problems (Neural network)

edited November 2017 in Questions about Code

Hello all, I am working on creating a neural network template (users can determine # of layers and # neurons/layer) and I am currently using the MNIST database to test it. There is one thing wrong, however. The last set of connections (the ones that lead to the output) have consistently all approached a value of 0.5, making all the calculations made in the previous layers moot. This problem leads to every part of the network working except for the last set of connections and output neurons. Since the equation for error sensitivity is different for the last set of connections, I assumed that was the problem, but I have checked it and I think that it should work correctly. The code is a bit messy, so feel free to ask me to clarify what any section of the code means. I have a feeling the problem is in the Train method (lines 257-310). The program is intensive, so if you run it to test it (which I highly recommend) I would use a capable computer. Macbook Airs simply do not cut it, unfortunately. Also, use the MNIST database found at https://pjreddie.com/projects/mnist-in-csv/ (thanks, Dinoswarleafs).

import java.lang.Math.*;

int[] numNs;
Brain b;
ArrayList<NeuronDisplay[]> nDisplay;
ArrayList<Neuron[]> NeuronsToMap = new ArrayList<Neuron[]>();

Table trainReader;
Table testReader;
//TableRow csvRow;
int counter = 0;


TableRow csvRow;


int pixelSize = 6;

float[][] expectedValues;

float[][] trainingData;

void setup() {
  size(1000, 800);
  nDisplay = new ArrayList<NeuronDisplay[]>();
  int[] numNeurons = {784, 26, 16, 10};
  trainReader = loadTable("mnist_train.csv");
  testReader = loadTable("mnist_test.csv");


  //int[] numNeurons = new int[int(random(3, 10))];
  //for(int i = 0; i<numNeurons.length; i++){ numNeurons[i] = int(random(2, 10));}

  numNs = numNeurons;
  b = new Brain(numNs);

  float[][] ex = new float[trainReader.getRowCount()][10];
  for (int i = 0; i< trainReader.getRowCount(); i++) {
    for (int j = 0; j< 10; j++) {
      if (j == trainReader.getInt(i, 0)) ex[i][j] =1;
      else ex[i][j] = 0;
    }
  }
  expectedValues = ex;

  float[][] train = new float[trainReader.getRowCount()][trainReader.getColumnCount()-1];
  for (int i = 0; i< train.length; i++) {
    for (int j = 1; j< train[0].length; j++) {
      train[i][j] = trainReader.getInt(i, j);
    }
  }

  trainingData = train;
  // b.train(ex);
}


void draw() {
  background(#FF820D);
  strokeWeight(1);
  //pushMatrix();
  displayNet();
  // popMatrix();
  //ud();
  //  if (mousePressed) ud();
  if (counter < 60000) {
    ud(counter);
    counter ++;
  } else println("-----~~~~~DONE~~~~~-----");

  if (counter < 60000) {
    csvRow = trainReader.getRow(counter);
    pushMatrix();
    for (int i = 0; i < trainReader.getColumnCount(); i++) {
      fill(csvRow.getInt(i));
      noStroke();
      rectMode(CENTER);

      rect(pixelSize * (i % 28) + 600, pixelSize * (i / 28) + 200, pixelSize, pixelSize);
      textAlign(CENTER);
      stroke(255);
      textSize(20);
      fill(255);
      text(trainReader.getInt(counter, 0), 650, 400);
    }
    popMatrix();
  }
} 

//void mouseClicked(){
// ud(); 
//}

void ud(int wh) {


  float[][] corresponding = expectedValues;

  Neuron[] n = new Neuron[numNs[0]];

  //  int which = int(random(trainingData.length));
  int which = wh;

  for (int i = 0; i< n.length; i++) {
    n[i] = new Neuron(trainingData[which][i]);
  }

  b.updateInputs(n);
  b.update();


  float[] corr = new float[numNs[numNs.length-1]];

  corr = corresponding[which];

  b.train(corr);
  println("done");
}

class Brain {

  int[] layerSizes;
  ArrayList<Neuron[]> netLayers = new ArrayList<Neuron[]>();
  ArrayList<Connection[][]> connections = new ArrayList<Connection[][]>();

  public Brain(int[] layers) {

    layerSizes = layers;

    for (int i = 0; i< layers.length; i ++) {
      // println(layers[i]);
      netLayers.add(new Neuron[layers[i]]);
    }

    for (int i = 0; i< netLayers.size(); i++) {
      // NeuronDisplay[] ns = new NeuronDisplay[netLayers.get(i).length];

      for (int j = 0; j< netLayers.get(i).length; j++) {
        //ns[j] = new NeuronDisplay((i * 100 + 100), (j*60 + 100), i, j);
        netLayers.get(i)[j]= new Neuron(0);
      }
      //  nDisplay.add(ns);
    }

    NeuronsToMap = netLayers;

    Neuron[] inputs = new Neuron[layers[0]];    
    //  println(inputs.length);

    for (int i = 0; i< inputs.length; i++) {
      inputs[i] = new Neuron(random(0, 1));
    }

    updateInputs(inputs);

    for (int i = 1; i< layers.length; i++) {
      connections.add(randomWeights(layers[i-1], layers[i]));
    }

    for (int i = 0; i< netLayers.size(); i++) {
      // println();
      // printArray(netLayers.get(i));
      for (int j = 0; j< netLayers.get(i).length; j++) {
        //   println(netLayers.get(i)[j].input);
      }

      // println("___");
    }

    update();

    for (int i = 0; i< connections.size(); i++) {
      Connection[][] holder = connections.get(i);     
      for (int j = 0; j< holder.length; j++) {
        for (int k = 0; k< holder[j].length; k++) {
          //     println("which: " + i + " ["+ j + "][" + k + "] " + holder[j][k].weight);
        }
      }
    } 

    for (int i = 0; i< netLayers.size(); i++) {
      //   println();
      for (int j = 0; j< netLayers.get(i).length; j++) {
        //    println("| Layer: " + i + " | Neuron : " + j + " | Value: " + netLayers.get(i)[j].output);
      }
      // printArray(netLayers.get(i));
    }

    for (int k = 0; k< netLayers.size(); k++) {
      NeuronDisplay[] ns = new NeuronDisplay[netLayers.get(k).length];

      for (int j = 0; j< ns.length; j++) {
        ns[j] = new NeuronDisplay((k * 100 + 100), (j*60 + 100), k, j);
        //  netLayers.get(i)[j]= new Neuron();
      }

      nDisplay.add(ns);
    }
  }

  public Connection[][] randomWeights(int r, int c) {

    Connection[][] ret = new Connection[r][c];

    for (int i = 0; i< r; i++) {
      for (int j =0; j< c; j++) {
        ret[i][j] = new Connection(random(-1, 1));
      }
    }

    return ret;
  }

  void updateWeights(int which, Connection[][] change) {    
    connections.set(which, change);
  }

  Neuron[] retInputs() {
    return netLayers.get(0);
  }

  Neuron[] retOutputs() {
    return netLayers.get(netLayers.size()-1);
  }

  int[] retLayerNums() {
    return layerSizes;
  }

  void updateInputs(Neuron[] in) {   
    netLayers.set(0, in);
  }

  void update() {

    for (int i = 0; i< netLayers.size()-1; i++) {
      //  float[] ret = new float[netLayers.get(i+1).length];
      Neuron[] layer1 = netLayers.get(i);
      Neuron[] nextLayer = new Neuron[netLayers.get(i+1).length];

      Connection[][] conns = connections.get(i);

      for (int j = 0; j< netLayers.get(i+1).length; j++) {
        nextLayer[j] = new Neuron(0);
        for (int k = 0; k< layer1.length; k++) {
          nextLayer[j].input += layer1[k].output * conns[k][j].weight;
        }
        nextLayer[j].activate();
        //nextLayer[j].carry();
        // else nextLayer[j].activate();
      }
      netLayers.set(i+1, nextLayer);
    }
    NeuronsToMap = netLayers;
  }  

  void train(float[] expect) {

    //float[] errors = new float[netLayers.get(netLayers.size()-1).length];
    float[] expected = expect;

    //float[][] errorSensitivities = ;
    // float totalError = 0;
    float learningRate = .1;

    for (int l = netLayers.size()-1; l>= 0; l--) { 
      Neuron[] outputNeurons = netLayers.get(l);      
      for (int j = 0; j< netLayers.get(l).length; j++) {
        if (l >= netLayers.size() -1) {
          netLayers.get(l)[j].dC_dAj = 2*(outputNeurons[j].output - expected[j]);
        } else {
          for (int p = 0; p< netLayers.get(l+1).length; p++) {
            //  println(netLayers.size(), " ", l, " ", netLayers.get(l).length, " ", j, "  ", p);
            netLayers.get(l)[j].dC_dAj += connections.get(l)[j][p].weight * ((1/(1+ exp(-(netLayers.get(l+1)[p].input))))
              *(1-(1/(1+ exp(-(netLayers.get(l+1)[p].input)))))) * netLayers.get(l+1)[p].dC_dAj;
          }
        }
      }
    }

    for (int n = 0; n<netLayers.size()-1; n++) {
      //   println(netLayers.size(), " ", n);
      //   Neuron[] outputNeurons = netLayers.get(n+1);
      Connection[][] layerN = connections.get(n);  
      Neuron[] previous  = netLayers.get(n);

      for (int i = 0; i<layerN.length; i++) {
        for (int j = 0; j< layerN[i].length; j++) { 
          //println(netLayers.size(), " ", outC.length, " ", outC[i].length, " ", n, " ", i, " ", j);
          //connections.get(n)[i][j].updateSensitivity(1);
          connections.get(n)[i][j].updateSensitivity(previous[i].output * ((1/(1+ exp(-(netLayers.get(n+1)[j].input)))) *
            (1-(1/(1+ exp(-(netLayers.get(n+1)[j].input)))))) * netLayers.get(n+1)[j].dC_dAj);
        }
      }
    }

    for (int i = 0; i< connections.size(); i++) {
      Connection[][] ret = new Connection[connections.get(i).length][connections.get(i)[0].length];
      for (int j = 0; j< connections.get(i).length; j++) {
        for (int k = 0; k< connections.get(i)[j].length; k++) {
          float newWeight = connections.get(i)[j][k].weight - learningRate * connections.get(i)[j][k].errorSensitivity * connections.get(i)[j][k].weight;
          //println( connections.get(i)[j][k].errorSensitivity);

          ret[j][k]= new Connection(newWeight);
        }
      } 
      connections.set(i, ret);
    }
  } // end train()
}

class Connection {

  float errorSensitivity;
  float weight;
  float learningRate = .01;
  float fill;

  public Connection(float weight) {
    this.weight = weight;
    this.errorSensitivity = 0;
    fill = map(weight, -1, 1, 0, 255);
  }

  void updateWeight() {
    // weight = weight - learningRate*(errorSensitivity);
    weight = random(-1, 1);
    fill = map(weight, -1, 1, 0, 255);
  }

  void updateSensitivity(float in) {

    errorSensitivity = in;
  }
}

class Neuron {

  public float input = -10; // weighted input into neuron
  float output = -10; //activated output
  float bias;
  float fill = 255;
  float dC_dAj = 0;

  //public Neuron() {
  //  input = 0;
  //  output = 0;
  //  bias = 0;
  //  fill = int(map(input, 0, 1, 0, 255));

  //}

  public Neuron(float input) {
    this.input = input;
    this.output = input;
    bias = 0;
    fill = int(map(input, 0, 1, 0, 255));
  }

  void updateInput(float in) {
    input = in;
  }

  void updateBias(float in) {
    bias = in;
  }

  void activate() {        
    // output = (float)Math.tanh(input);    
    // output = 1/(1+e^(-input))
    output = 1/(1+ exp(-(input-bias)));
    //    println(output);
    fill = int(map(output, 0, 1, 0, 255));
  }

  void isInput() {
    output = input;
  }
  void carry() {
    output = input;
  }
}

class NeuronDisplay {

  int x, y;
  float size = 30;
  int layer, position;
  int fill;

  public NeuronDisplay(int x, int y, int whichLayer, int whichNeuron) {   
    this.x = x;
    this.y = y;
    this.layer = whichLayer;
    this.position = whichNeuron;
  }

  void display() {
    // println("netLayers size: " + b.netLayers.size(), " ", layer, " ", b.netLayers.get(layer).length + " " + position);
    if (layer<b.netLayers.size() && position < b.netLayers.get(layer).length)  fill(b.netLayers.get(layer)[position].fill);
    else fill(255, 0, 0);
    ellipse(x, y, size, size);
  }
}

void displayNet() {

  for (int i = 0; i< nDisplay.size()-1; i++) {
    NeuronDisplay[] hold = nDisplay.get(i);
    NeuronDisplay[] next = nDisplay.get(i+1);

    for (int j = 0; j< hold.length; j++) {
      for (int k = 0; k<next.length; k++) {
        stroke(b.connections.get(i)[j][k].fill);
        line(hold[j].x, hold[j].y, next[k].x, next[k].y);
      }
    }
  }

  for (int i = 0; i< nDisplay.size(); i++) {
    //  NeuronDisplay[] hold = nDisplay.get(i);
    for (int j = 0; j< nDisplay.get(i).length; j++) {
      stroke(255);
      nDisplay.get(i)[j].display();
    }
  }
}
Sign In or Register to comment.