нейронная сеть — реализация алгоритма обратного распространения с использованием c ++ дает неправильный результат

Question

нейронная сеть — реализация алгоритма обратного распространения с использованием c ++ дает неправильный результат

Я работаю над реализацией алгоритма PB, я использовал код из этого блога ( Вот ). Я делаю некоторые модификации его работы.

Проблема во всех шаблонах, которые я хочу протестировать, ошибка для меня 1 для всех тестовых шаблонов. что конечно не правильно.

Дополнительная работа что я добавил в example.cpp программа сможет читать текстовый файл и извлекать из него входные и выходные данные, а также отделять обучающий набор и набор тестирования от текстового файла. и распечатайте их, чтобы увидеть результат процесса выборки текстового файла.

я использовал 1 скрытый слой, 10 нейронов в скрытом слое а также 20000 эпох.

входы программы следующие:

Входы программы

bpnet.h

#ifndef BPNET_H
#define BPNET_H

/*********************************Structure representing a neuron******************************/

struct neuron
{
float *weights; // neuron input weights or synaptic connections
float *deltavalues; //neuron delta values
float output; //output value
float gain; //Gain value
float wgain; //Weight gain value

neuron(); //Constructor
~neuron(); //Destructor
void create(int inputcount); //Allocates memory and initializates values
};/**************************************Structure representing a layer******************************/

struct layer
{
neuron **neurons; //The array of neurons
int neuroncount; //Contains the total number of neurons
float *layerinput; //The layer input
int inputcount; //The total count of elements in layerinput

layer(); //Object constructor. Initializates all values as 0

~layer(); //Destructor. Frees the memory used by the layer

void create(int inputsize, int _neuroncount); //Creates the layer and allocates memory
void calculate(); //Calculates all neurons performing the network formula
};/********************************Structure Representing the network********************************/

class bpnet
{
private:
layer m_inputlayer; //input layer of the network
layer m_outputlayer; //output layer..contains the result of applying the network
layer **m_hiddenlayers; //Additional hidden layers
int m_hiddenlayercount; //the count of additional hidden layers

public:

//function tu create in memory the network structure
bpnet();//Construction..initialzates all values to 0
~bpnet();//Destructor..releases memory

//Creates the network structure on memory
void create(int inputcount,int inputneurons,int outputcount,int *hiddenlayers,int hiddenlayercount);

void propagate(const float *input);//Calculates the network values given an input pattern

//Updates the weight values of the network given a desired output and applying the backpropagation
//Algorithm
float train(const float *desiredoutput,const float *input,float alpha, float momentum);

//Updates the next layer input values
void update(int layerindex);

//Returns the output layer..this is useful to get the output values of the network
inline layer &getOutput()
{
return m_outputlayer;
}

};

#endif // BPNET_H

bpnet.cpp

#include "bpnet.h"#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>/*****************************neuron routines*******************************/

//constructor
neuron::neuron():weights(0),deltavalues(0),output(0),gain(0),wgain(0)
{

}

//Destructor
neuron::~neuron()
{
if(weights)
delete [] weights;
if(deltavalues)
delete [] deltavalues;
}

//Initializates neuron weights
void neuron::create(int inputcount)
{
assert(inputcount);
float sign = -1; //to change sign
float random; //to get random number
weights = new float[inputcount];
deltavalues = new float[inputcount];

//important initializate all weights as random unsigned values
//and delta values as 0
for( int i = 0; i < inputcount; i++)
{
//get a random number between -0.5 and 0.5
random = ( float(rand()) / float(RAND_MAX) )/2.f; //min 0.5
random *= sign;
sign *= -1;
weights[i] = random;
deltavalues[i] = 0;
}

gain = 1;

random = ( float(rand()) / float(RAND_MAX) )/2.f; //min 0.5
random *= sign;
sign *= -1;
wgain = random;}/***********************************Layer member functions********************************/

layer::layer():neurons(0),neuroncount(0),layerinput(0),inputcount(0)
{

}

layer::~layer()
{
if(neurons)
{
for( int i = 0; i < neuroncount; i++ )
{
delete neurons[i];
}

delete [] neurons;
}
if(layerinput)
{
delete [] layerinput;
}
}

void layer::create( int inputsize, int _neuroncount )
{
assert( inputsize && _neuroncount );//check for errors

int i;

neurons = new neuron*[_neuroncount];
for( i = 0; i < _neuroncount; i++)
{
neurons[i] = new neuron;
neurons[i] -> create(inputsize);
}

layerinput = new float[inputsize];
neuroncount = _neuroncount;
inputcount = inputsize;
}//Calculates the neural network result of the layer using the sigmoid function
void layer::calculate()
{
int i,j;
float sum;

//Apply the formula for each neuron
for( i = 0; i < neuroncount; i++ )
{
sum = 0; //store the sum of all values here
for( j = 0; j < inputcount; j++ )
{
//Performing function
sum += neurons[i] -> weights[j] * layerinput[j]; //apply input * weight
}

sum += neurons[i] ->wgain * neurons[i] -> gain; //apply the gain or theta multiplied by the gain weight.

//sigmoidal activation function
neurons[i] -> output= 1.f / (1.f + exp(-sum)); //calculate the sigmoid function
//neurons[i]->output=-1 + 2*(1.f + exp(-sum));
}
}/***************************bpnet object functions**************/

bpnet::bpnet():m_hiddenlayers(0),m_hiddenlayercount(0)
{
}

bpnet::~bpnet()
{
if(m_hiddenlayers)
{
for(int i=0;i<m_hiddenlayercount;i++)
{
delete m_hiddenlayers[i];
}

delete [] m_hiddenlayers;
}

}

void bpnet::create(int inputcount, int inputneurons, int outputcount, int *hiddenlayers, int hiddenlayercount)
{

//make sure required values are not zero

assert(inputcount && inputneurons && outputcount);

int i;

m_inputlayer.create(inputcount,inputneurons);

if(hiddenlayers && hiddenlayercount)
{
m_hiddenlayers = new layer*[hiddenlayercount];
m_hiddenlayercount = hiddenlayercount;

for( i = 0; i < hiddenlayercount; i++ )
{
m_hiddenlayers[i] = new layer;
if( i==0 )
{
//first hidden layer receives the output of the inputlayer so we set as input the neuroncount
//of the inputlayer
m_hiddenlayers[i]->create(inputneurons,hiddenlayers[i]);
}
else
{
m_hiddenlayers[i]->create(hiddenlayers[i-1],hiddenlayers[i]);
}
}

m_outputlayer.create(hiddenlayers[hiddenlayercount - 1],outputcount);
}
else
{
m_outputlayer.create(inputneurons,outputcount);
}
}void bpnet::propagate(const float *input)
{
//The propagation function should start from the input layer
//first copy the input vector to the input layer Always make sure the size
//"array input" has the same size of inputcount
memcpy(m_inputlayer.layerinput,input,m_inputlayer.inputcount * sizeof(float));//now calculate the inputlayer
m_inputlayer.calculate();update(-1);//propagate the inputlayer out values to the next layerif(m_hiddenlayers)
{
//Calculating hidden layers if any
for(int i = 0; i < m_hiddenlayercount; i++ )
{
m_hiddenlayers[i] -> calculate();
update(i);
}
}

//calculating the final statge: the output layer
m_outputlayer.calculate();
}

//Main training function. Run this function in a loop as many times needed per pattern
float bpnet::train(const float *desiredoutput, const float *input, float alpha, float momentum)
{
//function train, teaches the network to recognize a pattern given a desired output

float errorg=0; //general quadratic error
float errorc; //local error;
float sum=0,csum=0;
float delta,udelta;
float output;

//first we begin by propagating the input
propagate(input);
int i,j,k;

//the backpropagation algorithm starts from the output layer propagating the error  from the output
//layer to the input layer

for( i = 0; i < m_outputlayer.neuroncount; i++ )
{
//calculate the error value for the output layer
output=m_outputlayer.neurons[i]->output; //copy this value to facilitate calculations

//from the algorithm we can take the error value as
errorc=(desiredoutput[i] - output) * output * (1 - output);

//and the general error as the sum of delta values. Where delta is the squared difference
//of the desired value with the output value
//quadratic error
errorg+=(desiredoutput[i] - output) * (desiredoutput[i] - output) ;

//now we proceed to update the weights of the neuron
for( j = 0; j < m_outputlayer.inputcount; j++ )
{
//get the current delta value
delta=m_outputlayer.neurons[i]->deltavalues[j];

//update the delta value
udelta = alpha * errorc * m_outputlayer.layerinput[j] + delta * momentum;

//update the weight values
m_outputlayer.neurons[i]->weights[j]+=udelta;
m_outputlayer.neurons[i]->deltavalues[j]=udelta;

//we need this to propagate to the next layer
sum += m_outputlayer.neurons[i]->weights[j] * errorc;
}

//calculate the weight gain
m_outputlayer.neurons[i]->wgain += alpha * errorc * m_outputlayer.neurons[i]->gain;

}

for(i = (m_hiddenlayercount - 1); i >= 0; i--)
{
for( j = 0; j < m_hiddenlayers[i]->neuroncount; j++ )
{
output = m_hiddenlayers[i]->neurons[j]->output;

//calculate the error for this layer
errorc = output * (1-output) * sum;

//update neuron weights
for( k = 0; k < m_hiddenlayers[i]->inputcount; k++ )
{
delta = m_hiddenlayers[i]->neurons[j]->deltavalues[k];
udelta = alpha * errorc * m_hiddenlayers[i]->layerinput[k] + delta * momentum;
m_hiddenlayers[i]->neurons[j]->weights[k] += udelta;
m_hiddenlayers[i]->neurons[j]->deltavalues[k] = udelta;
csum += m_hiddenlayers[i]->neurons[j]->weights[k] * errorc;//needed for next layer

}

m_hiddenlayers[i]->neurons[j]->wgain += alpha * errorc * m_hiddenlayers[i]->neurons[j]->gain;

}

sum = csum;
csum = 0;
}

//and finally process the input layer
for( i = 0; i < m_inputlayer.neuroncount; i++)
{
output = m_inputlayer.neurons[i]->output;
errorc = output * (1 - output) * sum;

for(j = 0; j < m_inputlayer.inputcount; j++)
{
delta = m_inputlayer.neurons[i]->deltavalues[j];
udelta = alpha * errorc * m_inputlayer.layerinput[j] + delta * momentum;
//update weights
m_inputlayer.neurons[i]->weights[j] += udelta;
m_inputlayer.neurons[i]->deltavalues[j] = udelta;
}
//and update the gain weight
m_inputlayer.neurons[i]->wgain+=alpha * errorc * m_inputlayer.neurons[i]->gain;
}

//return the general error divided by 2
return errorg / 2;

}

void bpnet::update(int layerindex)
{
int i;
if( layerindex == -1 )
{
//dealing with the inputlayer here and propagating to the next layer
for( i = 0; i < m_inputlayer.neuroncount; i++ )
{
if(m_hiddenlayers)//propagate to the first hidden layer
{
m_hiddenlayers[0]->layerinput[i] = m_inputlayer.neurons[i]->output;
}
else //propagate directly to the output layer
{
m_outputlayer.layerinput[i] = m_inputlayer.neurons[i]->output;
}
}

}
else
{
for( i = 0; i < m_hiddenlayers[layerindex]->neuroncount; i++)
{
//not the last hidden layer
if( layerindex < m_hiddenlayercount -1 )
{
m_hiddenlayers[layerindex + 1]->layerinput[i] = m_hiddenlayers[layerindex]->neurons[i]->output;
}
else
{
m_outputlayer.layerinput[i] = m_hiddenlayers[layerindex]->neurons[i]->output;
}
}
}
}

example.cpp

#include <iostream>
#include "bpnet.h"#include <string>
#include <sstream>
#include <fstream>
#include <vector>
using namespace std;
#define HIDDEN_LAYERS 10
#define HIDDEN_LAYER_COUNT 1float** Read_Data_Form_file ( int AN, int IN, string FN, int in, int ou, string iop )
{
int Att_Num = AN; //number of attributes in the Dataset.
int Inst_Num = IN; //number of instants in the Dataset.
string File_name = FN; //file name.

int inputs_Num = in; //number of inputs.
int outputs_Num = ou; //number of outputs.

string input_output_possition = iop; //define where is the inputs and outputs position at right or left.
// False: outputs in the left and inputs in the right.
// True: outputs in the right and inputs in the left.

//open file.
ifstream DataSet(File_name);

string line; //save data line.

//Create 2D array to save result.
float** data = new float*[Inst_Num];
for(int i = 0; i < Inst_Num; ++i)
{ data[i] = new float[Att_Num]; }

int i=0, j=0;

//check if the file is opened or not.
if ( DataSet.is_open() )
{

int lineNo = 0; //define the rows number.
int propertyNo; //define the columns number.

//read the data from the .txt file line by line.
while ( getline(DataSet,line) )
{

stringstream linestream(line); //define a stringstream variable for variable line.
string value;

float new_value = 0.0;

propertyNo = 0;

//Read line by line from linestream variable, separated by commas and save them in value variable.
while ( getline(linestream,value, ',') )
{
new_value = stof(value);
data[lineNo][propertyNo] = new_value; //save the value of variable (value) into array data.

//cout << data[lineNo][propertyNo] << "\t";

if ( propertyNo < Att_Num )
{ propertyNo++; }
}

//cout << endl;if ( lineNo < Inst_Num )
{ lineNo++; }

}

//close the file.
DataSet.close();
}
else
{
//print error message if the file not found.
cout << "error, not found the file!" << endl;
}//return back the array.
return data;

}

int main()
{

int counter1 = 0,counter2 = 0,counter3 = 0,counter4 = 0;

int Att_Num; //number of attributes in the Dataset.
int Inst_Num; //number of instants in the Dataset.
int Inst_Num_train; //number of instants in the Dataset for training.
int Inst_Num_test; //number of instants in the Dataset for testing.
string File_name; //file name.

int inputs_Num; //number of inputs.
int outputs_Num; //number of outputs.

string input_output_possition = "false"; //define where is the inputs and outputs position at right or left.
// False: outputs in the left and inputs in the right.
// True: outputs in the right and inputs in the left.

int EPOCHS_NUM;

//Read the file name from the user.
cout << "Enter the file name (follow by .txt):  ";
cin >> File_name;
cout << endl;

//Read the attributes and instants numbers from the user.
cout << "Enter the attributes and instants ( for total, train and test ) numbers:  ";
cin >> Att_Num >> Inst_Num >> Inst_Num_train >> Inst_Num_test;
cout << endl;

//Read the input and outputs numbers from the user.
cout << "Enter the inputs and outputs numbers:  ";
cin >> inputs_Num >> outputs_Num;
cout << endl;

//Read the input and outputs possition.
cout << "Enter the inputs and outputs possition (true:right,false:left):  ";
cin >> input_output_possition;
cout << endl;

//Read the epochs.
cout << "Enter the epochs number :  ";
cin >> EPOCHS_NUM;
cout << endl;

//Process of Reading the dataSet from file and save in into array.
//Create 2D array to save result.
float** data = new float*[Inst_Num];
for(int i = 0; i < Inst_Num; ++i)
{ data[i] = new float[Att_Num]; }//Call the function to read data from file, and return the result.
data = Read_Data_Form_file(Att_Num,Inst_Num,File_name,inputs_Num,outputs_Num,input_output_possition);//________________________________________________________________________________________________________
//seprate the outputs and inputs in different arrays.
//FOR TRAINING PROCESS.
//________________________________________________________________________________________________________//Create 2D array to save result (inputs).
float** data_inputs = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ data_inputs[i] = new float[Att_Num - outputs_Num]; }

//Create 2D array to save result (outputs).
float** data_outputs = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ data_outputs[i] = new float[Att_Num - inputs_Num]; }if ( input_output_possition == "false" )
{
for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int j = 0; j < outputs_Num; j++ )
{ data_outputs[i][j] = data[i][j]; }
}

for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int z = outputs_Num; z < Att_Num; z++)
{ data_inputs[i][z - outputs_Num] = data[i][z]; }
}
}
else if ( input_output_possition == "true" )
{
for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int j = inputs_Num; j < Att_Num; j++ )
{ data_outputs[i][j - inputs_Num] = data[i][j]; }
}

for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int z = 0; z < inputs_Num - 1; z++)
{ data_inputs[i][z] = data[i][z]; }
}
}//print the result.
//inputs.
cout << "Intputs:" << endl;
for ( int i = 0; i < Inst_Num_train; i++ )
{
for ( int j = 0; j < inputs_Num; j++)
{ cout << data_inputs[i][j] << "\t"; }

//counter1++;
cout << endl;

}

//cout << "counter 1: " << counter1 << endl;;//outputs.
cout << "Outputs:" << endl;
for ( int i = 0; i < Inst_Num_train; i++ )
{
for ( int j = 0; j < outputs_Num; j++)
{ cout << data_outputs[i][j] << "\t"; }

//counter2++;
cout << endl;

}

//cout << "counter 2: " << counter2 << endl;

cout << " *********************************************************** " << endl;

//________________________________________________________________________________________________________
//seprate the outputs and inputs in different arrays.
//FOR TESTING PROCESS.
//________________________________________________________________________________________________________//Create 2D array to save result (inputs).
float** data_inputs2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ data_inputs2[i] = new float[Att_Num - outputs_Num]; }

//Create 2D array to save result (outputs).
float** data_outputs2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ data_outputs2[i] = new float[Att_Num - inputs_Num]; }if ( input_output_possition == "false" )
{
for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int j = 0; j < outputs_Num; j++ )
{ data_outputs2[i - Inst_Num_train][j] = data[i][j]; }
}

for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int z = outputs_Num; z < Att_Num; z++)
{ data_inputs2[i - Inst_Num_train][z - outputs_Num] = data[i][z]; }
}
}
else if ( input_output_possition == "true" )
{
for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int j = inputs_Num; j < Att_Num; j++ )
{ data_outputs2[i - Inst_Num_train][j - inputs_Num] = data[i][j]; }
}

for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int z = 0; z < inputs_Num - 1; z++)
{ data_inputs2[i - Inst_Num_train][z] = data[i][z]; }
}
}//print the result.
//inputs.
cout << "Intputs:" << endl;
for ( int i = 0; i < Inst_Num_test; i++ )
{
for ( int j = 0; j < inputs_Num; j++)
{ cout << data_inputs2[i][j] << "\t"; }

//counter3++;
cout << endl;

}

//cout << "counter 3: " << counter3;

//outputs.
cout << "Outputs:" << endl;
for ( int i = 0; i < Inst_Num_test; i++ )
{
for ( int j = 0; j < outputs_Num; j++)
{ cout << data_outputs2[i][j] << "\t"; }

//counter4++;
cout << endl;

}

//cout << "counter 4: " << counter4;

//________________________________________________________________________________________________________
//________________________________________________________________________________________________________//Create some patterns ( for train )
float** pattern = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ pattern[i] = new float[Att_Num - outputs_Num]; }

pattern = data_inputs;

//Desired output values ( for train )
float** desiredout = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ desiredout[i] = new float[Att_Num - inputs_Num]; }

desiredout = data_outputs;//=====================================================//Create some patterns ( for test )
float** pattern2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ pattern2[i] = new float[Att_Num - outputs_Num]; }

pattern2 = data_inputs2;

//Desired output values ( for test )
float** desiredout2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ desiredout2[i] = new float[Att_Num - inputs_Num]; }

desiredout2 = data_outputs2;

//-------------------------------------------------------------------bpnet net;//Our neural network object
int i,j;
float error; //save the error value.

//define the hidden layers and the number of neurons in each layer.
int hiddenlayerNeuronCount [HIDDEN_LAYER_COUNT] = {HIDDEN_LAYERS};
int hiddenlayercount = HIDDEN_LAYER_COUNT;//We create the network
//net.create(PATTERN_SIZE,NETWORK_INPUTNEURONS,NETWORK_OUTPUT,HIDDEN_LAYERS,HIDDEN_LAYERS);
net.create(Att_Num,inputs_Num,outputs_Num,hiddenlayerNeuronCount,hiddenlayercount);//Start the neural network training
for( i = 0 ; i < EPOCHS_NUM ; i++ )
{
error = 0 ;//make the value of error zero.

for( j = 0 ; j < Inst_Num_train ; j++ )
{
error += net.train(desiredout[j], pattern[j], 0.2f, 0.1f);
}

error /= Inst_Num_train;

//display error
cout << "ERROR:" << error << "\r";

}//once trained test all patterns

for( i = 0 ; i < Inst_Num_test ; i++ )
{

net.propagate( pattern2[i] );

//display result
cout << "TESTED PATTERN " << i+1 << " DESIRED OUTPUT: " << *desiredout2[i] << " NET RESULT: "<< net.getOutput().neurons[0]->output << endl;
}system("pause");
return 0;
}

Balance_Scale.txt

( Вот )

Выходные шоу для меня:

Выход

Как показано на изображениях, все 125 тестовые шаблоны имеют та же ошибка!

0

backpropagation c++neural-network