We are about to switch to a new forum software. Until then we have removed the registration on this forum.
Hello, I've got a question for all the machine learning experts here on the forum. I have recently been working on a neural network template that allows users to specify the number of layers and how many neurons should be in each layer. I have recently implemented a function that "trains" the network using back propagation (hopefully). I am not that experienced with backpropagation, but I am taking a linear algebra class and have taken calculus. I do not know if the network actually trains, so I want to use a database to train and test the network. However, I have not been able to get definitive results from the networks I have been using, hence why I want to use the MNIST database. I also do not know how to get the MNIST database into a form that I can use to train my network. My ultimate goal would be to take the 28*28 images and transform them into an array of 784 values, with a corresponding integer array. How can I do this?
import java.lang.Math.*;
int[] numNs;
Brain b;
ArrayList<NeuronDisplay[]> nDisplay;
ArrayList<Neuron[]> NeuronsToMap = new ArrayList<Neuron[]>();
void setup() {
size(1000, 800);
nDisplay = new ArrayList<NeuronDisplay[]>();
int[] numNeurons = {2, 2, 1};
//int[] numNeurons = new int[int(random(3, 10))];
//for(int i = 0; i<numNeurons.length; i++){ numNeurons[i] = int(random(2, 10));}
numNs = numNeurons;
b = new Brain(numNs);
//float[] ex = {1,1,1, 1};
// b.train(ex);
}
void draw() {
background(#FF820D);
strokeWeight(1);
//pushMatrix();
displayNet();
// popMatrix();
//ud();
if (mousePressed) ud();
}
void ud() {
float[][] train =
{
{1, 0},
{0, 1},
{0, 0},
{1, 1}
}
;
float[] corresponding ={0, 0, 1, 1};
Neuron[] n = new Neuron[numNs[0]];
int which = int(random(train.length));
for (int i = 0; i< n.length; i++) {
n[i] = new Neuron(train[which][i]);
}
b.updateInputs(n);
b.update();
//for (int i = 0; i< b.connections.size(); i++) {
// Connection[][] holder = b.connections.get(i);
// for (int j = 0; j< holder.length; j++) {
// for (int k = 0; k< holder[j].length; k++) {
// println("which: " + i + " ["+ j + "][" + k + "] " + holder[j][k].weight);
// }
// }
//}
//for (int i = 0; i< b.netLayers.size(); i++) {
// println();
// for (int j = 0; j< b.netLayers.get(i).length; j++) {
// println("| Layer: " + i + " | Neuron : " + j + " | Value: " + b.netLayers.get(i)[j].output);
// }
// // printArray(netLayers.get(i));
//}
// float[] ex = {1,1,1, 1};
float[] corr = new float[numNs[numNs.length-1]];
corr[0] = corresponding[which];
b.train(corr);
}
class Brain {
int[] layerSizes;
ArrayList<Neuron[]> netLayers = new ArrayList<Neuron[]>();
ArrayList<Connection[][]> connections = new ArrayList<Connection[][]>();
public Brain(int[] layers) {
layerSizes = layers;
for (int i = 0; i< layers.length; i ++) {
// println(layers[i]);
netLayers.add(new Neuron[layers[i]]);
}
for (int i = 0; i< netLayers.size(); i++) {
// NeuronDisplay[] ns = new NeuronDisplay[netLayers.get(i).length];
for (int j = 0; j< netLayers.get(i).length; j++) {
//ns[j] = new NeuronDisplay((i * 100 + 100), (j*60 + 100), i, j);
netLayers.get(i)[j]= new Neuron(0);
}
// nDisplay.add(ns);
}
NeuronsToMap = netLayers;
Neuron[] inputs = new Neuron[layers[0]];
println(inputs.length);
for (int i = 0; i< inputs.length; i++) {
inputs[i] = new Neuron(random(0, 1));
}
updateInputs(inputs);
for (int i = 1; i< layers.length; i++) {
connections.add(randomWeights(layers[i-1], layers[i]));
}
for (int i = 0; i< netLayers.size(); i++) {
println();
// printArray(netLayers.get(i));
for (int j = 0; j< netLayers.get(i).length; j++) {
println(netLayers.get(i)[j].input);
}
println("___");
}
update();
for (int i = 0; i< connections.size(); i++) {
Connection[][] holder = connections.get(i);
for (int j = 0; j< holder.length; j++) {
for (int k = 0; k< holder[j].length; k++) {
println("which: " + i + " ["+ j + "][" + k + "] " + holder[j][k].weight);
}
}
}
for (int i = 0; i< netLayers.size(); i++) {
println();
for (int j = 0; j< netLayers.get(i).length; j++) {
println("| Layer: " + i + " | Neuron : " + j + " | Value: " + netLayers.get(i)[j].output);
}
// printArray(netLayers.get(i));
}
for (int k = 0; k< netLayers.size(); k++) {
NeuronDisplay[] ns = new NeuronDisplay[netLayers.get(k).length];
for (int j = 0; j< ns.length; j++) {
ns[j] = new NeuronDisplay((k * 100 + 100), (j*60 + 100), k, j);
// netLayers.get(i)[j]= new Neuron();
}
nDisplay.add(ns);
}
}
public Connection[][] randomWeights(int r, int c) {
Connection[][] ret = new Connection[r][c];
for (int i = 0; i< r; i++) {
for (int j =0; j< c; j++) {
ret[i][j] = new Connection(random(-1, 1));
}
}
return ret;
}
void updateWeights(int which, Connection[][] change) {
connections.set(which, change);
}
Neuron[] retInputs() {
return netLayers.get(0);
}
Neuron[] retOutputs() {
return netLayers.get(netLayers.size()-1);
}
int[] retLayerNums() {
return layerSizes;
}
void updateInputs(Neuron[] in) {
netLayers.set(0, in);
}
void update() {
for (int i = 0; i< netLayers.size()-1; i++) {
// float[] ret = new float[netLayers.get(i+1).length];
Neuron[] layer1 = netLayers.get(i);
Neuron[] nextLayer = new Neuron[netLayers.get(i+1).length];
Connection[][] conns = connections.get(i);
for (int j = 0; j< netLayers.get(i+1).length; j++) {
nextLayer[j] = new Neuron(0);
for (int k = 0; k< layer1.length; k++) {
nextLayer[j].input += layer1[k].output * conns[k][j].weight;
}
nextLayer[j].activate();
//nextLayer[j].carry();
// else nextLayer[j].activate();
}
netLayers.set(i+1, nextLayer);
}
NeuronsToMap = netLayers;
}
void train(float[] expect) {
//float[] errors = new float[netLayers.get(netLayers.size()-1).length];
float[] expected = expect;
//float[][] errorSensitivities = ;
// float totalError = 0;
float learningRate = .1;
for (int l = netLayers.size()-1; l>= 0; l--) {
Neuron[] outputNeurons = netLayers.get(l);
for (int j = 0; j< netLayers.get(l).length; j++) {
if (l >= netLayers.size() -1) {
netLayers.get(l)[j].dC_dAj = 2*(outputNeurons[j].output - expected[j]);
} else {
for (int p = 0; p< netLayers.get(l+1).length; p++) {
// println(netLayers.size(), " ", l, " ", netLayers.get(l).length, " ", j, " ", p);
netLayers.get(l)[j].dC_dAj += connections.get(l)[j][p].weight * ((1/(1+ exp(-(netLayers.get(l+1)[p].input))))*(1-(1/(1+ exp(-(netLayers.get(l+1)[p].input)))))) * netLayers.get(l+1)[p].dC_dAj;
}
}
}
}
//for (int n = netLayers.size()-1; n> 0; n--) {
// println(netLayers.size(), " ", n);
// // Neuron[] outputNeurons = netLayers.get(n+1);
// Connection[][] outC = connections.get(n-1);
// Neuron[] previous = netLayers.get(n-1);
// for (int i = 0; i< outC.length; i++) {
// for (int j = 0; j< outC[i].length; j++) {
// println(netLayers.size(), " ", outC.length, " ", outC[i].length, " ", n, " ", i, " ", j);
// outC[i][j].updateSensitivity(previous[i].output * ((1/(1+ exp(-(netLayers.get(n)[j].input))))*(1-(1/(1+ exp(-(netLayers.get(n)[j].input)))))) * netLayers.get(n)[j].dC_dAj);
// }
// }
//}
for (int n = 0; n<netLayers.size()-1; n++) {
println(netLayers.size(), " ", n);
// Neuron[] outputNeurons = netLayers.get(n+1);
Connection[][] layerN = connections.get(n);
Neuron[] previous = netLayers.get(n);
for (int i = 0; i<layerN.length; i++) {
for (int j = 0; j< layerN[i].length; j++) {
//println(netLayers.size(), " ", outC.length, " ", outC[i].length, " ", n, " ", i, " ", j);
//connections.get(n)[i][j].updateSensitivity(1);
connections.get(n)[i][j].updateSensitivity(previous[i].output * ((1/(1+ exp(-(netLayers.get(n+1)[j].input))))*(1-(1/(1+ exp(-(netLayers.get(n+1)[j].input)))))) * netLayers.get(n+1)[j].dC_dAj);
}
}
}
for (int i = 0; i< connections.size(); i++) {
Connection[][] ret = new Connection[connections.get(i).length][connections.get(i)[0].length];
for (int j = 0; j< connections.get(i).length; j++) {
for (int k = 0; k< connections.get(i)[j].length; k++) {
float newWeight = connections.get(i)[j][k].weight - learningRate * connections.get(i)[j][k].errorSensitivity * connections.get(i)[j][k].weight;
//println( connections.get(i)[j][k].errorSensitivity);
ret[j][k]= new Connection(newWeight);
}
}
connections.set(i, ret);
}
//for (int j = 0; j< outs[0].length; j++) {
// for (int i = 0; i< outs.length; i++) {
// float errorSensitivity = 2*errors[j] * (1-(sq((float)Math.tanh(outP[j]))));
// float newWeight = outs[i][j].weight - learningRate * errorSensitivity * previous[i];
// }
//}
// if (netLayers.size() > 2) {
// for (int layer = netLayers.size()-2; layer >=0; layer--) {
// // Connection[][] prevLayer = connections.get(layer-1);
// Connection[][] thisLayer = connections.get(layer);
// float[] prev = netLayers.get(layer);
// float[] next = netLayers.get(layer+1);
// for (int j = 0; j< thisLayer[0].length; j++) {
// for (int i = 0; i< thisLayer.length; i++) {
// // float errorSensitivity = 2*errors[j] * (1-(sq((float)Math.tanh(outP[j]))));
// // float errorSensitivity = 2* totalError * (1-(sq((float)Math.tanh(next[j])))) * thisLayer[;
// //* (0.5*(float)Math.log( (prev[i] + 1.0) / (prev[i] - 1.0))) ;
// float newWeight = thisLayer[i][j].weight - learningRate * errorSensitivity * prev[i];
// }
// }
// }
// }
} // end train()
}
class Connection {
float errorSensitivity;
float weight;
float learningRate = .01;
float fill;
public Connection(float weight) {
this.weight = weight;
this.errorSensitivity = 0;
fill = map(weight, -1, 1, 0, 255);
}
void updateWeight() {
// weight = weight - learningRate*(errorSensitivity);
weight = random(-1, 1);
fill = map(weight, -1, 1, 0, 255);
}
void updateSensitivity(float in) {
errorSensitivity = in;
}
}
class Neuron {
public float input = -10; // weighted input into neuron
float output = -10; //activated output
float bias;
float fill = 255;
float dC_dAj = 0;
//public Neuron() {
// input = 0;
// output = 0;
// bias = 0;
// fill = int(map(input, 0, 1, 0, 255));
//}
public Neuron(float input) {
this.input = input;
this.output = input;
bias = 0;
fill = int(map(input, 0, 1, 0, 255));
}
void updateInput(float in) {
input = in;
}
void updateBias(float in) {
bias = in;
}
void activate() {
// output = (float)Math.tanh(input);
// output = 1/(1+e^(-input))
output = 1/(1+ exp(-(input-bias)));
println(output);
fill = int(map(output, 0, 1, 0, 255));
}
void isInput() {
output = input;
}
void carry() {
output = input;
}
}
class NeuronDisplay {
int x, y;
float size = 30;
int layer, position;
int fill;
public NeuronDisplay(int x, int y, int whichLayer, int whichNeuron) {
this.x = x;
this.y = y;
this.layer = whichLayer;
this.position = whichNeuron;
}
void display() {
println("netLayers size: " + b.netLayers.size(), " ", layer, " ", b.netLayers.get(layer).length + " " + position);
if (layer<b.netLayers.size() && position < b.netLayers.get(layer).length) fill(b.netLayers.get(layer)[position].fill);
else fill(255, 0, 0);
ellipse(x, y, size, size);
}
}
void displayNet() {
for (int i = 0; i< nDisplay.size()-1; i++) {
NeuronDisplay[] hold = nDisplay.get(i);
NeuronDisplay[] next = nDisplay.get(i+1);
for (int j = 0; j< hold.length; j++) {
for (int k = 0; k<next.length; k++) {
stroke(b.connections.get(i)[j][k].fill);
line(hold[j].x, hold[j].y, next[k].x, next[k].y);
}
}
}
for (int i = 0; i< nDisplay.size(); i++) {
// NeuronDisplay[] hold = nDisplay.get(i);
for (int j = 0; j< nDisplay.get(i).length; j++) {
stroke(255);
nDisplay.get(i)[j].display();
}
}
}
Answers
Your code is kinda formatted weirdly so it's hard for others to help. Also since it is such a massive amount of code don't expect anyone to search through it since it pretty overwhelming. What I did when using the MNIST dataset in Python was feed the pixel data of each pixel into a 1 Dimensional array (like its color level for each element) and just feed those into the neural network. I had about 95-98% success just using this method IIRC
The MNIST website gives the data in the file format .gz; how can I convert this into a PNG or JPG that will be easier to process? ps. Fixed formatting. It's been a while since I've been here haha Also, thanks for the quick reply.
EDIT: The code is a mess, and is formatted non-standard, since this is just a project I have been working on in spare time. I can explain any bit if you comment. I mainly added the code in so others could put it in their own editors and test it out. I don't really have a problem with the code since it appears to work, but I am not 100% sure that the backpropagation equations are right or if the training method really works (lines 218-308).
Oh right my bad. I got them from here : https://pjreddie.com/projects/mnist-in-csv/ they're in CSV format
Thanks! I will use that.
For anyone reading this in the future, here is an example of using one of the files (the test one in this case, put it in your processing sketch) in Processing :