Я работаю над реализацией алгоритма PB, я использовал код из этого блога ( Вот ). Я делаю некоторые модификации его работы.
Проблема во всех шаблонах, которые я хочу протестировать, ошибка для меня 1 для всех тестовых шаблонов. что конечно не правильно.
Дополнительная работа что я добавил в example.cpp программа сможет читать текстовый файл и извлекать из него входные и выходные данные, а также отделять обучающий набор и набор тестирования от текстового файла. и распечатайте их, чтобы увидеть результат процесса выборки текстового файла.
я использовал 1 скрытый слой, 10 нейронов в скрытом слое а также 20000 эпох.
входы программы следующие:
bpnet.h
#ifndef BPNET_H
#define BPNET_H
/*********************************Structure representing a neuron******************************/
struct neuron
{
float *weights; // neuron input weights or synaptic connections
float *deltavalues; //neuron delta values
float output; //output value
float gain; //Gain value
float wgain; //Weight gain value
neuron(); //Constructor
~neuron(); //Destructor
void create(int inputcount); //Allocates memory and initializates values
};/**************************************Structure representing a layer******************************/
struct layer
{
neuron **neurons; //The array of neurons
int neuroncount; //Contains the total number of neurons
float *layerinput; //The layer input
int inputcount; //The total count of elements in layerinput
layer(); //Object constructor. Initializates all values as 0
~layer(); //Destructor. Frees the memory used by the layer
void create(int inputsize, int _neuroncount); //Creates the layer and allocates memory
void calculate(); //Calculates all neurons performing the network formula
};/********************************Structure Representing the network********************************/
class bpnet
{
private:
layer m_inputlayer; //input layer of the network
layer m_outputlayer; //output layer..contains the result of applying the network
layer **m_hiddenlayers; //Additional hidden layers
int m_hiddenlayercount; //the count of additional hidden layers
public:
//function tu create in memory the network structure
bpnet();//Construction..initialzates all values to 0
~bpnet();//Destructor..releases memory
//Creates the network structure on memory
void create(int inputcount,int inputneurons,int outputcount,int *hiddenlayers,int hiddenlayercount);
void propagate(const float *input);//Calculates the network values given an input pattern
//Updates the weight values of the network given a desired output and applying the backpropagation
//Algorithm
float train(const float *desiredoutput,const float *input,float alpha, float momentum);
//Updates the next layer input values
void update(int layerindex);
//Returns the output layer..this is useful to get the output values of the network
inline layer &getOutput()
{
return m_outputlayer;
}
};
#endif // BPNET_H
bpnet.cpp
#include "bpnet.h"#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>/*****************************neuron routines*******************************/
//constructor
neuron::neuron():weights(0),deltavalues(0),output(0),gain(0),wgain(0)
{
}
//Destructor
neuron::~neuron()
{
if(weights)
delete [] weights;
if(deltavalues)
delete [] deltavalues;
}
//Initializates neuron weights
void neuron::create(int inputcount)
{
assert(inputcount);
float sign = -1; //to change sign
float random; //to get random number
weights = new float[inputcount];
deltavalues = new float[inputcount];
//important initializate all weights as random unsigned values
//and delta values as 0
for( int i = 0; i < inputcount; i++)
{
//get a random number between -0.5 and 0.5
random = ( float(rand()) / float(RAND_MAX) )/2.f; //min 0.5
random *= sign;
sign *= -1;
weights[i] = random;
deltavalues[i] = 0;
}
gain = 1;
random = ( float(rand()) / float(RAND_MAX) )/2.f; //min 0.5
random *= sign;
sign *= -1;
wgain = random;}/***********************************Layer member functions********************************/
layer::layer():neurons(0),neuroncount(0),layerinput(0),inputcount(0)
{
}
layer::~layer()
{
if(neurons)
{
for( int i = 0; i < neuroncount; i++ )
{
delete neurons[i];
}
delete [] neurons;
}
if(layerinput)
{
delete [] layerinput;
}
}
void layer::create( int inputsize, int _neuroncount )
{
assert( inputsize && _neuroncount );//check for errors
int i;
neurons = new neuron*[_neuroncount];
for( i = 0; i < _neuroncount; i++)
{
neurons[i] = new neuron;
neurons[i] -> create(inputsize);
}
layerinput = new float[inputsize];
neuroncount = _neuroncount;
inputcount = inputsize;
}//Calculates the neural network result of the layer using the sigmoid function
void layer::calculate()
{
int i,j;
float sum;
//Apply the formula for each neuron
for( i = 0; i < neuroncount; i++ )
{
sum = 0; //store the sum of all values here
for( j = 0; j < inputcount; j++ )
{
//Performing function
sum += neurons[i] -> weights[j] * layerinput[j]; //apply input * weight
}
sum += neurons[i] ->wgain * neurons[i] -> gain; //apply the gain or theta multiplied by the gain weight.
//sigmoidal activation function
neurons[i] -> output= 1.f / (1.f + exp(-sum)); //calculate the sigmoid function
//neurons[i]->output=-1 + 2*(1.f + exp(-sum));
}
}/***************************bpnet object functions**************/
bpnet::bpnet():m_hiddenlayers(0),m_hiddenlayercount(0)
{
}
bpnet::~bpnet()
{
if(m_hiddenlayers)
{
for(int i=0;i<m_hiddenlayercount;i++)
{
delete m_hiddenlayers[i];
}
delete [] m_hiddenlayers;
}
}
void bpnet::create(int inputcount, int inputneurons, int outputcount, int *hiddenlayers, int hiddenlayercount)
{
//make sure required values are not zero
assert(inputcount && inputneurons && outputcount);
int i;
m_inputlayer.create(inputcount,inputneurons);
if(hiddenlayers && hiddenlayercount)
{
m_hiddenlayers = new layer*[hiddenlayercount];
m_hiddenlayercount = hiddenlayercount;
for( i = 0; i < hiddenlayercount; i++ )
{
m_hiddenlayers[i] = new layer;
if( i==0 )
{
//first hidden layer receives the output of the inputlayer so we set as input the neuroncount
//of the inputlayer
m_hiddenlayers[i]->create(inputneurons,hiddenlayers[i]);
}
else
{
m_hiddenlayers[i]->create(hiddenlayers[i-1],hiddenlayers[i]);
}
}
m_outputlayer.create(hiddenlayers[hiddenlayercount - 1],outputcount);
}
else
{
m_outputlayer.create(inputneurons,outputcount);
}
}void bpnet::propagate(const float *input)
{
//The propagation function should start from the input layer
//first copy the input vector to the input layer Always make sure the size
//"array input" has the same size of inputcount
memcpy(m_inputlayer.layerinput,input,m_inputlayer.inputcount * sizeof(float));//now calculate the inputlayer
m_inputlayer.calculate();update(-1);//propagate the inputlayer out values to the next layerif(m_hiddenlayers)
{
//Calculating hidden layers if any
for(int i = 0; i < m_hiddenlayercount; i++ )
{
m_hiddenlayers[i] -> calculate();
update(i);
}
}
//calculating the final statge: the output layer
m_outputlayer.calculate();
}
//Main training function. Run this function in a loop as many times needed per pattern
float bpnet::train(const float *desiredoutput, const float *input, float alpha, float momentum)
{
//function train, teaches the network to recognize a pattern given a desired output
float errorg=0; //general quadratic error
float errorc; //local error;
float sum=0,csum=0;
float delta,udelta;
float output;
//first we begin by propagating the input
propagate(input);
int i,j,k;
//the backpropagation algorithm starts from the output layer propagating the error from the output
//layer to the input layer
for( i = 0; i < m_outputlayer.neuroncount; i++ )
{
//calculate the error value for the output layer
output=m_outputlayer.neurons[i]->output; //copy this value to facilitate calculations
//from the algorithm we can take the error value as
errorc=(desiredoutput[i] - output) * output * (1 - output);
//and the general error as the sum of delta values. Where delta is the squared difference
//of the desired value with the output value
//quadratic error
errorg+=(desiredoutput[i] - output) * (desiredoutput[i] - output) ;
//now we proceed to update the weights of the neuron
for( j = 0; j < m_outputlayer.inputcount; j++ )
{
//get the current delta value
delta=m_outputlayer.neurons[i]->deltavalues[j];
//update the delta value
udelta = alpha * errorc * m_outputlayer.layerinput[j] + delta * momentum;
//update the weight values
m_outputlayer.neurons[i]->weights[j]+=udelta;
m_outputlayer.neurons[i]->deltavalues[j]=udelta;
//we need this to propagate to the next layer
sum += m_outputlayer.neurons[i]->weights[j] * errorc;
}
//calculate the weight gain
m_outputlayer.neurons[i]->wgain += alpha * errorc * m_outputlayer.neurons[i]->gain;
}
for(i = (m_hiddenlayercount - 1); i >= 0; i--)
{
for( j = 0; j < m_hiddenlayers[i]->neuroncount; j++ )
{
output = m_hiddenlayers[i]->neurons[j]->output;
//calculate the error for this layer
errorc = output * (1-output) * sum;
//update neuron weights
for( k = 0; k < m_hiddenlayers[i]->inputcount; k++ )
{
delta = m_hiddenlayers[i]->neurons[j]->deltavalues[k];
udelta = alpha * errorc * m_hiddenlayers[i]->layerinput[k] + delta * momentum;
m_hiddenlayers[i]->neurons[j]->weights[k] += udelta;
m_hiddenlayers[i]->neurons[j]->deltavalues[k] = udelta;
csum += m_hiddenlayers[i]->neurons[j]->weights[k] * errorc;//needed for next layer
}
m_hiddenlayers[i]->neurons[j]->wgain += alpha * errorc * m_hiddenlayers[i]->neurons[j]->gain;
}
sum = csum;
csum = 0;
}
//and finally process the input layer
for( i = 0; i < m_inputlayer.neuroncount; i++)
{
output = m_inputlayer.neurons[i]->output;
errorc = output * (1 - output) * sum;
for(j = 0; j < m_inputlayer.inputcount; j++)
{
delta = m_inputlayer.neurons[i]->deltavalues[j];
udelta = alpha * errorc * m_inputlayer.layerinput[j] + delta * momentum;
//update weights
m_inputlayer.neurons[i]->weights[j] += udelta;
m_inputlayer.neurons[i]->deltavalues[j] = udelta;
}
//and update the gain weight
m_inputlayer.neurons[i]->wgain+=alpha * errorc * m_inputlayer.neurons[i]->gain;
}
//return the general error divided by 2
return errorg / 2;
}
void bpnet::update(int layerindex)
{
int i;
if( layerindex == -1 )
{
//dealing with the inputlayer here and propagating to the next layer
for( i = 0; i < m_inputlayer.neuroncount; i++ )
{
if(m_hiddenlayers)//propagate to the first hidden layer
{
m_hiddenlayers[0]->layerinput[i] = m_inputlayer.neurons[i]->output;
}
else //propagate directly to the output layer
{
m_outputlayer.layerinput[i] = m_inputlayer.neurons[i]->output;
}
}
}
else
{
for( i = 0; i < m_hiddenlayers[layerindex]->neuroncount; i++)
{
//not the last hidden layer
if( layerindex < m_hiddenlayercount -1 )
{
m_hiddenlayers[layerindex + 1]->layerinput[i] = m_hiddenlayers[layerindex]->neurons[i]->output;
}
else
{
m_outputlayer.layerinput[i] = m_hiddenlayers[layerindex]->neurons[i]->output;
}
}
}
}
example.cpp
#include <iostream>
#include "bpnet.h"#include <string>
#include <sstream>
#include <fstream>
#include <vector>
using namespace std;
#define HIDDEN_LAYERS 10
#define HIDDEN_LAYER_COUNT 1float** Read_Data_Form_file ( int AN, int IN, string FN, int in, int ou, string iop )
{
int Att_Num = AN; //number of attributes in the Dataset.
int Inst_Num = IN; //number of instants in the Dataset.
string File_name = FN; //file name.
int inputs_Num = in; //number of inputs.
int outputs_Num = ou; //number of outputs.
string input_output_possition = iop; //define where is the inputs and outputs position at right or left.
// False: outputs in the left and inputs in the right.
// True: outputs in the right and inputs in the left.
//open file.
ifstream DataSet(File_name);
string line; //save data line.
//Create 2D array to save result.
float** data = new float*[Inst_Num];
for(int i = 0; i < Inst_Num; ++i)
{ data[i] = new float[Att_Num]; }
int i=0, j=0;
//check if the file is opened or not.
if ( DataSet.is_open() )
{
int lineNo = 0; //define the rows number.
int propertyNo; //define the columns number.
//read the data from the .txt file line by line.
while ( getline(DataSet,line) )
{
stringstream linestream(line); //define a stringstream variable for variable line.
string value;
float new_value = 0.0;
propertyNo = 0;
//Read line by line from linestream variable, separated by commas and save them in value variable.
while ( getline(linestream,value, ',') )
{
new_value = stof(value);
data[lineNo][propertyNo] = new_value; //save the value of variable (value) into array data.
//cout << data[lineNo][propertyNo] << "\t";
if ( propertyNo < Att_Num )
{ propertyNo++; }
}
//cout << endl;if ( lineNo < Inst_Num )
{ lineNo++; }
}
//close the file.
DataSet.close();
}
else
{
//print error message if the file not found.
cout << "error, not found the file!" << endl;
}//return back the array.
return data;
}
int main()
{
int counter1 = 0,counter2 = 0,counter3 = 0,counter4 = 0;
int Att_Num; //number of attributes in the Dataset.
int Inst_Num; //number of instants in the Dataset.
int Inst_Num_train; //number of instants in the Dataset for training.
int Inst_Num_test; //number of instants in the Dataset for testing.
string File_name; //file name.
int inputs_Num; //number of inputs.
int outputs_Num; //number of outputs.
string input_output_possition = "false"; //define where is the inputs and outputs position at right or left.
// False: outputs in the left and inputs in the right.
// True: outputs in the right and inputs in the left.
int EPOCHS_NUM;
//Read the file name from the user.
cout << "Enter the file name (follow by .txt): ";
cin >> File_name;
cout << endl;
//Read the attributes and instants numbers from the user.
cout << "Enter the attributes and instants ( for total, train and test ) numbers: ";
cin >> Att_Num >> Inst_Num >> Inst_Num_train >> Inst_Num_test;
cout << endl;
//Read the input and outputs numbers from the user.
cout << "Enter the inputs and outputs numbers: ";
cin >> inputs_Num >> outputs_Num;
cout << endl;
//Read the input and outputs possition.
cout << "Enter the inputs and outputs possition (true:right,false:left): ";
cin >> input_output_possition;
cout << endl;
//Read the epochs.
cout << "Enter the epochs number : ";
cin >> EPOCHS_NUM;
cout << endl;
//Process of Reading the dataSet from file and save in into array.
//Create 2D array to save result.
float** data = new float*[Inst_Num];
for(int i = 0; i < Inst_Num; ++i)
{ data[i] = new float[Att_Num]; }//Call the function to read data from file, and return the result.
data = Read_Data_Form_file(Att_Num,Inst_Num,File_name,inputs_Num,outputs_Num,input_output_possition);//________________________________________________________________________________________________________
//seprate the outputs and inputs in different arrays.
//FOR TRAINING PROCESS.
//________________________________________________________________________________________________________//Create 2D array to save result (inputs).
float** data_inputs = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ data_inputs[i] = new float[Att_Num - outputs_Num]; }
//Create 2D array to save result (outputs).
float** data_outputs = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ data_outputs[i] = new float[Att_Num - inputs_Num]; }if ( input_output_possition == "false" )
{
for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int j = 0; j < outputs_Num; j++ )
{ data_outputs[i][j] = data[i][j]; }
}
for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int z = outputs_Num; z < Att_Num; z++)
{ data_inputs[i][z - outputs_Num] = data[i][z]; }
}
}
else if ( input_output_possition == "true" )
{
for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int j = inputs_Num; j < Att_Num; j++ )
{ data_outputs[i][j - inputs_Num] = data[i][j]; }
}
for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
{
for ( int z = 0; z < inputs_Num - 1; z++)
{ data_inputs[i][z] = data[i][z]; }
}
}//print the result.
//inputs.
cout << "Intputs:" << endl;
for ( int i = 0; i < Inst_Num_train; i++ )
{
for ( int j = 0; j < inputs_Num; j++)
{ cout << data_inputs[i][j] << "\t"; }
//counter1++;
cout << endl;
}
//cout << "counter 1: " << counter1 << endl;;//outputs.
cout << "Outputs:" << endl;
for ( int i = 0; i < Inst_Num_train; i++ )
{
for ( int j = 0; j < outputs_Num; j++)
{ cout << data_outputs[i][j] << "\t"; }
//counter2++;
cout << endl;
}
//cout << "counter 2: " << counter2 << endl;
cout << " *********************************************************** " << endl;
//________________________________________________________________________________________________________
//seprate the outputs and inputs in different arrays.
//FOR TESTING PROCESS.
//________________________________________________________________________________________________________//Create 2D array to save result (inputs).
float** data_inputs2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ data_inputs2[i] = new float[Att_Num - outputs_Num]; }
//Create 2D array to save result (outputs).
float** data_outputs2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ data_outputs2[i] = new float[Att_Num - inputs_Num]; }if ( input_output_possition == "false" )
{
for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int j = 0; j < outputs_Num; j++ )
{ data_outputs2[i - Inst_Num_train][j] = data[i][j]; }
}
for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int z = outputs_Num; z < Att_Num; z++)
{ data_inputs2[i - Inst_Num_train][z - outputs_Num] = data[i][z]; }
}
}
else if ( input_output_possition == "true" )
{
for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int j = inputs_Num; j < Att_Num; j++ )
{ data_outputs2[i - Inst_Num_train][j - inputs_Num] = data[i][j]; }
}
for ( int i = Inst_Num_train; i < Inst_Num ; i++)
{
for ( int z = 0; z < inputs_Num - 1; z++)
{ data_inputs2[i - Inst_Num_train][z] = data[i][z]; }
}
}//print the result.
//inputs.
cout << "Intputs:" << endl;
for ( int i = 0; i < Inst_Num_test; i++ )
{
for ( int j = 0; j < inputs_Num; j++)
{ cout << data_inputs2[i][j] << "\t"; }
//counter3++;
cout << endl;
}
//cout << "counter 3: " << counter3;
//outputs.
cout << "Outputs:" << endl;
for ( int i = 0; i < Inst_Num_test; i++ )
{
for ( int j = 0; j < outputs_Num; j++)
{ cout << data_outputs2[i][j] << "\t"; }
//counter4++;
cout << endl;
}
//cout << "counter 4: " << counter4;
//________________________________________________________________________________________________________
//________________________________________________________________________________________________________//Create some patterns ( for train )
float** pattern = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ pattern[i] = new float[Att_Num - outputs_Num]; }
pattern = data_inputs;
//Desired output values ( for train )
float** desiredout = new float*[Inst_Num_train];
for(int i = 0; i < Inst_Num_train; ++i)
{ desiredout[i] = new float[Att_Num - inputs_Num]; }
desiredout = data_outputs;//=====================================================//Create some patterns ( for test )
float** pattern2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ pattern2[i] = new float[Att_Num - outputs_Num]; }
pattern2 = data_inputs2;
//Desired output values ( for test )
float** desiredout2 = new float*[Inst_Num_test];
for(int i = 0; i < Inst_Num_test; ++i)
{ desiredout2[i] = new float[Att_Num - inputs_Num]; }
desiredout2 = data_outputs2;
//-------------------------------------------------------------------bpnet net;//Our neural network object
int i,j;
float error; //save the error value.
//define the hidden layers and the number of neurons in each layer.
int hiddenlayerNeuronCount [HIDDEN_LAYER_COUNT] = {HIDDEN_LAYERS};
int hiddenlayercount = HIDDEN_LAYER_COUNT;//We create the network
//net.create(PATTERN_SIZE,NETWORK_INPUTNEURONS,NETWORK_OUTPUT,HIDDEN_LAYERS,HIDDEN_LAYERS);
net.create(Att_Num,inputs_Num,outputs_Num,hiddenlayerNeuronCount,hiddenlayercount);//Start the neural network training
for( i = 0 ; i < EPOCHS_NUM ; i++ )
{
error = 0 ;//make the value of error zero.
for( j = 0 ; j < Inst_Num_train ; j++ )
{
error += net.train(desiredout[j], pattern[j], 0.2f, 0.1f);
}
error /= Inst_Num_train;
//display error
cout << "ERROR:" << error << "\r";
}//once trained test all patterns
for( i = 0 ; i < Inst_Num_test ; i++ )
{
net.propagate( pattern2[i] );
//display result
cout << "TESTED PATTERN " << i+1 << " DESIRED OUTPUT: " << *desiredout2[i] << " NET RESULT: "<< net.getOutput().neurons[0]->output << endl;
}system("pause");
return 0;
}
Balance_Scale.txt
( Вот )
Выходные шоу для меня:
Как показано на изображениях, все 125 тестовые шаблоны имеют та же ошибка!
Задача ещё не решена.
Других решений пока нет …