Поэтому я работаю над реализацией нейронной сети backprop:
Я сделал этот класс ‘NEURON’, как и каждый новичок в нейронной сети.
Однако у меня странные результаты: вы видите, когда набор данных небольшой (как в случае функции XOR, где может быть только 4 возможных перестановки (00, 11, 01, 10) для набора данных), вывод Нейрон дает мне очень близкий результат, независимо от того, сколько тренировочных итераций (эпох) происходит.
Пример: 1 XOR 1 дает мне 0,987, а 1 XOR 0 дает мне 0,986, разве они не должны быть далеко друг от друга?
Вот код класса, на случай:
#pragma once
#include <vector>
#include <iostream>
#include "Math.h"#include "RandomizationUtils.h"
using namespace std;
class ClNeuron
{
public:
enum NEURON_TYPE { NEURON_TYPE_INPUT=1,NEURON_TYPE_HIDDEN=2,NEURON_TYPE_OUTPUT=3 };
private:
static const int CONST_DEFAULT_INPUT_NUMBER_PER_NEURON = 20;
static const double CONST_DEFAULT_MOMENTUM_VALUE = 0.4;
//Connection between 2 neurons
struct NEURON_CONNECTION
{
double m_weight;
double m_data;
//Last modification done to the weight
double m_weight_last_delta;
double m_momentum_value;
ClNeuron* m_source_neuron;
ClNeuron* m_target_neuron;
};
//Initialization function
void Init(unsigned long p_uid,NEURON_TYPE p_type);
bool m_initialized;
//All of the output connection of this neuron
vector<NEURON_CONNECTION*> m_output_connections;
//Al of the input connection of this neuron
vector<NEURON_CONNECTION*> m_input_connections;
//Tmp internal result buffer (containing all weights multiplicated by their inputs)
double m_result_buffer;
//special weight that always has an input of 1.0
NEURON_CONNECTION m_bias;
public:
//the type of this neuron
NEURON_TYPE m_type;
ClNeuron(NEURON_TYPE p_type);
ClNeuron(unsigned long p_uid,NEURON_TYPE p_type);
ClNeuron(unsigned long p_uid);
ClNeuron();
//Connect this neuron's output to another / others neurons' input
bool AddOutputConnection(ClNeuron* p_neuron);
//This neuron got a request to have a new input
NEURON_CONNECTION* InputConnectionRequest(ClNeuron* p_source_neuron);
//Tell the neuron to fire the sum of the processed inputs
double Fire();
//Tell the neuron to fire a particular data
double Fire(double p_data);
//Function updating all of the current neuron's weight of the OUTPUT connections , depending on an error ratio
void UpdateWeights(double p_wanted_output);
//Sum all the weight * their respective inputs into an internal buffer
void ProcessInputs();
//Print neuron & connections & weights
void PrintNeuronData();
//Unique ID of this neuron
unsigned long m_uid;
//This neuron's calculated error_delta
double m_error_gradient;
};
ClNeuron::NEURON_CONNECTION* ClNeuron::InputConnectionRequest(ClNeuron* p_neuron)
{
NEURON_CONNECTION* connection = new NEURON_CONNECTION;
if(!connection)
{
cout << "Error creating new connection, memory full ?" << endl << flush;
return NULL;
}
connection->m_weight = GetRandomDouble(-1,1);
connection->m_data = 0;
connection->m_momentum_value = CONST_DEFAULT_MOMENTUM_VALUE;
connection->m_source_neuron = p_neuron;
connection->m_target_neuron = this;
m_input_connections.push_back(connection);
return connection;
}
bool ClNeuron::AddOutputConnection(ClNeuron* p_neuron)
{
//If the remote neuron accept the us as a new input, then we add it to output list
NEURON_CONNECTION* connection = p_neuron->InputConnectionRequest(this);
if(!connection)
{
return false;
}
m_output_connections.push_back(connection);
return true;
}
double ClNeuron::Fire()
{
return Fire(m_result_buffer);
}
double ClNeuron::Fire(double p_data)
{
if(m_output_connections.size()==0)
{
cout << "Final neuron " << m_uid << " return " << p_data << endl;
return p_data;
}
for(unsigned long i=0;i<m_output_connections.size();i++)
{
m_output_connections[i]->m_data = p_data;
}
return 1;
}
void ClNeuron::ProcessInputs()
{
m_result_buffer = 0;
for(unsigned long i=0;i<m_input_connections.size();i++)
{
m_result_buffer += m_input_connections[i]->m_weight * m_input_connections[i]->m_data;
}
m_result_buffer += m_bias.m_weight ;
//sigmoid the sum
m_result_buffer = Sigmoid(m_result_buffer);
}
void ClNeuron::UpdateWeights(double p_wanted_output)
{
//Update weights from neuron to all of its inputs NOTE : p_wanted_output is the output of THIS neuron (in case their is many output neuron in the network)
if(m_type == NEURON_TYPE_OUTPUT)
{
m_error_gradient = (p_wanted_output - m_result_buffer) * SigmoidDerivative(m_result_buffer);
//Adjust the bias of this neuron
double weight_delta = 1 * m_error_gradient * 1 ;
double momentum = m_bias.m_weight_last_delta * m_bias.m_momentum_value;
m_bias.m_weight += weight_delta + momentum;
m_bias.m_weight_last_delta = weight_delta;
}
else if(m_type == NEURON_TYPE_HIDDEN)
{
double error_deriative = SigmoidDerivative(m_result_buffer);
double tmpBuffer = 0.00;
for(unsigned long i=0;i<m_output_connections.size();i++)
{
tmpBuffer += (m_output_connections[i]->m_target_neuron->m_error_gradient * m_output_connections[i]->m_weight);
}
m_error_gradient = error_deriative * tmpBuffer;//Adjust the weights for this neuron's OUTPUT connections
for(unsigned long i=0;i<m_output_connections.size();i++)
{
double weight_delta = 1 * m_output_connections[i]->m_target_neuron->m_error_gradient * m_result_buffer ;
double momentum = m_output_connections[i]->m_weight_last_delta * m_output_connections[i]->m_momentum_value;
m_output_connections[i]->m_weight += weight_delta + momentum;
m_output_connections[i]->m_weight_last_delta = weight_delta;
}
//Adjust the bias of this neuron
double weight_delta = 1 * m_error_gradient * 1 ;
double momentum = m_bias.m_weight_last_delta * m_bias.m_momentum_value;
m_bias.m_weight += weight_delta + momentum;
m_bias.m_weight_last_delta = weight_delta;
}
if(m_type == NEURON_TYPE_INPUT)
{
//Adjust the weights for this neuron's OUTPUT connections
for(unsigned long i=0;i<m_output_connections.size();i++)
{
double weight_delta = 1 * m_output_connections[i]->m_target_neuron->m_error_gradient * m_result_buffer ;
double momentum = m_output_connections[i]->m_weight_last_delta * m_output_connections[i]->m_momentum_value;
m_output_connections[i]->m_weight += weight_delta + momentum;
m_output_connections[i]->m_weight_last_delta = weight_delta;
}
}
}
void ClNeuron::PrintNeuronData()
{
cout << endl << "========================================" << endl;
cout << "Neuron #" << m_uid << " has " << m_input_connections.size() << " input connection" << endl << endl;
for(unsigned long i=0;i<m_input_connections.size();i++)
{
cout << "----> " << "conn." << i << " | Src ID: " << m_input_connections[i]->m_source_neuron->m_uid << " | W: "<< m_input_connections[i]->m_weight << " | D: "<< m_input_connections[i]->m_data << " | RB : " << m_result_buffer << " | EF: " << endl;
}
cout << "Neuron #" << m_uid << " has " << m_output_connections.size() << " output connection" << endl << endl;
for(unsigned long i=0;i<m_output_connections.size();i++)
{
cout << "----> " << "conn." << i << " | Dst ID: " << m_output_connections[i]->m_target_neuron->m_uid << " | W: "<< m_output_connections[i]->m_weight << " | D: "<< m_output_connections[i]->m_data << " | RB : " << m_result_buffer << " | EF: " << endl;
}
cout << endl << "========================================" << endl;
}
void ClNeuron::Init(unsigned long p_uid,NEURON_TYPE p_type)
{
m_initialized = false;
m_output_connections.clear();
m_input_connections.clear();
m_input_connections.reserve(CONST_DEFAULT_INPUT_NUMBER_PER_NEURON);
m_type = p_type;
m_uid = rand() % RAND_MAX;
m_result_buffer = 0;
m_bias.m_weight = GetRandomDouble(-1,1);
m_bias.m_data = 0;
m_bias.m_momentum_value = CONST_DEFAULT_MOMENTUM_VALUE;
m_bias.m_source_neuron = NULL;
m_bias.m_target_neuron = this;
m_initialized = true;
}
ClNeuron::ClNeuron(unsigned long p_uid,NEURON_TYPE p_type)
{
Init(p_uid,p_type);
}
ClNeuron::ClNeuron(NEURON_TYPE p_type)
{
Init(0,p_type);
}
ClNeuron::ClNeuron(unsigned long p_uid)
{
Init(p_uid,NEURON_TYPE_HIDDEN);
}
ClNeuron::ClNeuron()
{
Init(0,NEURON_TYPE_HIDDEN);
}
Проблема заключалась в значении веса BIAS для каждого нейрона:
Точнее, градиент ошибки всегда был 0 для смещения (вызывая weight_delta
0), что, в конечном итоге, привело к тому, что смещение не обновляет свои выходные веса.