У меня проблемы с реализацией обратного распространения в нейронной сети

Question

У меня проблемы с реализацией обратного распространения в нейронной сети

У меня есть простая нейронная сеть с прямой связью с 2 входных нейрона (и 1 смещенный нейрон), 4 скрытых нейрона (и 1 нейрон смещения), и один выходной нейрон.
Механизм обратной связи, кажется, работает нормально, но у меня проблемы с полным пониманием того, как реализовать алгоритм обратного распространения.

Есть 3 класса:

Neural Net :: ; строит сеть, передает входные значения вперед (пока нет обратного распространения)
Neural :: Neuron ; имеет характеристики нейрона (индекс, выход,
вес и т. д.)
Neural :: Подключение ; структурный класс, который рандомизирует
весит и удерживает вывод, дельта-вес и т. д.

Теперь, чтобы прояснить ситуацию, я беру класс исчисления, поэтому я понимаю несколько понятий, хотя это довольно продвинутый уровень, но я все еще хочу заставить его работать.

Передаточная функция является логистической функцией. Веса синапсов «привязаны» к нейрону, выводящему значение.

Это моя попытка использовать функцию обратного распространения:

void Net::backPropagate(const vector<double>& targetVals) {
Layer& outputLayer = myLayers.back();
assert(targetVals.size() == outputLayer.size());
cout << "good2" << endl;
// Starting with the output layer
for (unsigned int i = 0; i < outputLayer.size(); ++i) { // Traversing output layer
double output = outputLayer[i].getOutput(); cout << "good3" << endl;
double error = output * (1 - output) * (pow(targetVals[i] - output,2)); cout << "good4" << endl;
outputLayer[i].setError(error); // Calculating error
double newWeight = outputLayer[i].getWeight();
newWeight += (error * outputLayer[i].getOutput());
outputLayer[i].setWeight(newWeight); // Setting new weight
cout << "good5" << endl;
}

for (unsigned int i = myLayers.size() - 2; i > 0; --i) { // Traversing hidden layers all the way to input layer
Layer& currentLayer = myLayers[i];
Layer& nextLayer = myLayers[i + 1];
for (unsigned int j = 0; j < currentLayer.size(); ++j) { // Traversing current layer
const double& output = currentLayer[j].getOutput();
double subSum = 0.0; // Initializing subsum
for (unsigned int k = 0; k < nextLayer.size(); ++k) { // Traversing next layer
subSum += pow(nextLayer[k].getError() * currentLayer[j].getWeight(),2); // Getting their backpropagated error and weight
}
double error = output*(1 - output)*(subSum);
currentLayer[j].setError(error);
double newWeight = currentLayer[j].getWeight();
newWeight += error * output;
currentLayer[j].setWeight(newWeight);
}
}

Я пытался обучить свою сеть:

Вход {1,1} -> Выход {0}
Вход {0,0} -> Выход {1}

Но выходы для обоих очень близки к 1 (~ 0,998), независимо от того, сколько раз я тренируюсь, так что, очевидно, что-то не так.

// STL_Practice.cpp : Defines the entry point for the console application.
//
#include <iostream>
#include <cassert>
#include <cstdlib>
#include <vector>
#include <time.h>
#include "ConsoleColor.hpp"
using namespace std;

namespace Neural {
class Neuron;
typedef vector<Neuron> Layer;

// ******************** Class: Connection ******************** //
class Connection {
public:
Connection();
void setOutput(const double& outputVal) { myOutputVal = outputVal; }
void setWeight(const double& weight) { myDeltaWeight = myWeight- weight; myWeight = weight; }
double getOutput(void) const { return myOutputVal; }
double getWeight(void) const { return myWeight; }
private:
static double randomizeWeight(void) { return rand() / double(RAND_MAX); }
double myOutputVal;
double myWeight;
double myDeltaWeight;
};

Connection::Connection() {
myOutputVal = 0;
myWeight = Connection::randomizeWeight();
myDeltaWeight = myWeight;
cout << "Weight: " << myWeight << endl;
}

// ******************** Class: Neuron ************************ //
class Neuron {
public:
Neuron();
void setIndex(const unsigned int& index) { myIndex = index; }
void setOutput(const double& output) { myConnection.setOutput(output); }
void setWeight(const double& weight) { myConnection.setWeight(weight); }
void setError(const double& error) { myError = error; }
unsigned int getIndex(void) const { return myIndex; }
double getOutput(void) const { return myConnection.getOutput(); }
double getWeight(void) const { return myConnection.getWeight(); }
double getError(void) const { return myError; }
void feedForward(const Layer& prevLayer);
void printOutput(void) const;

private:
inline static double transfer(const double& weightedSum);
Connection myConnection;
unsigned int myIndex;
double myError;
};

Neuron::Neuron() : myIndex(0), myConnection() { }
double Neuron::transfer(const double& weightedSum) { return 1 / double((1 + exp(-weightedSum))); }
void Neuron::printOutput(void) const { cout << "Neuron " << myIndex << ':' << myConnection.getOutput() << endl; }
void Neuron::feedForward(const Layer& prevLayer) {
// Weight sum of the previous layer's output values
double weightedSum = 0;
for (unsigned int i = 0; i < prevLayer.size(); ++i) {
weightedSum += prevLayer[i].getOutput()*myConnection.getWeight();
cout << "Neuron " << i << " from prevLayer has output: " << prevLayer[i].getOutput() << endl;
cout << "Weighted sum: " << weightedSum << endl;
}
// Transfer function
myConnection.setOutput(Neuron::transfer(weightedSum));
cout << "Transfer: " << myConnection.getOutput() << endl;
}

// ******************** Class: Net *************************** //
class Net {
public:
Net(const vector<unsigned int>& topology);
void setTarget(const vector<double>& targetVals);
void feedForward(const vector<double>& inputVals);
void backPropagate(const vector<double>& targetVals);
void printOutput(void) const;
private:
vector<Layer> myLayers;
};
Net::Net(const vector<unsigned int>& topology) {
assert(topology.size() > 0);
for (unsigned int i = 0; i < topology.size(); ++i) { // Creating the layers
myLayers.push_back(Layer(((i + 1) == topology.size()) ? topology[i] : topology[i] + 1)); // +1 is for bias neuron
// Setting each neurons index inside layer
for (unsigned int j = 0; j < myLayers[i].size(); ++j) {
myLayers[i][j].setIndex(j);
}
// Console log
cout << red;
if (i == 0) {
cout << "Input layer (" << myLayers[i].size() << " neurons including bias neuron) created." << endl;
myLayers[i].back().setOutput(1);
}
else if (i < topology.size() - 1) {
cout << "Hidden layer " << i << " (" << myLayers[i].size() << " neurons including bias neuron) created." << endl;
myLayers[i].back().setOutput(1);
}
else { cout << "Output layer (" << myLayers[i].size() << " neurons) created." << endl; }
cout << white;
}
}
void Net::feedForward(const vector<double>& inputVals) {
assert(myLayers[0].size() - 1 == inputVals.size());
for (unsigned int i = 0; i < inputVals.size(); ++i) { // Setting input vals to input layer
cout << yellow << "Setting input vals...";
myLayers[0][i].setOutput(inputVals[i]); // myLayers[0] is the input layer
cout << "myLayer[0][" << i << "].getOutput()==" << myLayers[0][i].getOutput() << white << endl;
}
for (unsigned int i = 1; i < myLayers.size() - 1; ++i) { // Updating hidden layers
for (unsigned int j = 0; j < myLayers[i].size() - 1; ++j) { // - 1 because bias neurons do not have input
cout << "myLayers[" << i << "].size()==" << myLayers[i].size() << endl;
cout << green << "Updating neuron " << j << " inside layer " << i << white << endl;
myLayers[i][j].feedForward(myLayers[i - 1]); // Updating the neurons output based on the neurons of the previous layer
}
}
for (unsigned int i = 0; i < myLayers.back().size(); ++i) { // Updating output layer
cout << green << "Updating output neuron " << i << ": " << white << endl;
const Layer& prevLayer = myLayers[myLayers.size() - 2];
myLayers.back()[i].feedForward(prevLayer); // Updating the neurons output based on the neurons of the previous layer
}
}
void Net::printOutput(void) const {
for (unsigned int i = 0; i < myLayers.back().size(); ++i) {
cout << blue;  myLayers.back()[i].printOutput(); cout << white;
}
}
void Net::backPropagate(const vector<double>& targetVals) {
Layer& outputLayer = myLayers.back();
assert(targetVals.size() == outputLayer.size());
cout << "good2" << endl;
// Starting with the output layer
for (unsigned int i = 0; i < outputLayer.size(); ++i) { // Traversing output layer
double output = outputLayer[i].getOutput(); cout << "good3" << endl;
double error = output * (1 - output) * (pow(targetVals[i] - output,2)); cout << "good4" << endl;
outputLayer[i].setError(error); // Calculating error
double newWeight = outputLayer[i].getWeight();
newWeight += (error * outputLayer[i].getOutput());
outputLayer[i].setWeight(newWeight); // Setting new weight
cout << "good5" << endl;
}

for (unsigned int i = myLayers.size() - 2; i > 0; --i) { // Traversing hidden layers all the way to input layer
Layer& currentLayer = myLayers[i];
Layer& nextLayer = myLayers[i + 1];
for (unsigned int j = 0; j < currentLayer.size(); ++j) { // Traversing current layer
const double& output = currentLayer[j].getOutput();
double subSum = 0.0; // Initializing subsum
for (unsigned int k = 0; k < nextLayer.size(); ++k) { // Traversing next layer
subSum += pow(nextLayer[k].getError() * currentLayer[j].getWeight(),2); // Getting their backpropagated error and weight
}
double error = output*(1 - output)*(subSum);
currentLayer[j].setError(error);
double newWeight = currentLayer[j].getWeight();
newWeight += error * output;
currentLayer[j].setWeight(newWeight);
}
}
}
}

int main(int argc, char* argv[]) {
srand(time(NULL));
vector<unsigned int> myTopology;
myTopology.push_back(2);
myTopology.push_back(4);
myTopology.push_back(1);

cout << myTopology.size() << endl << endl; // myTopology == {3, 4, 2 ,1}
Neural::Net myNet(myTopology);
for (unsigned int i = 0; i < 50; ++i) {
myNet.feedForward({1, 1});
myNet.backPropagate({0});
}
for (unsigned int i = 0; i < 50; ++i){
myNet.feedForward({0, 0});
myNet.backPropagate({1});
}
cout << "Feeding 0,0" << endl;
myNet.feedForward({0, 0});
myNet.printOutput();
cout << "Feeding 1,1" << endl;
myNet.feedForward({1, 1});
myNet.printOutput();

return 0;
}

2

artificial-intelligence backpropagation c++neural-network

Решение

Другие решения

Используйте эволюционный алгоритм вместо обратного распространения для тренировки весов.

это должно помочь

1

Источник

Accepted Answer

Вы можете попробовать тренироваться до тех пор, пока ошибка сети не станет равна 0%, но это, вероятно, займет слишком много времени или будет невозможно. Обычно используется минимальная ошибка 0,01 (1%) с пороговыми значениями, такими как:> 0,9 = 1 и < 0,1 = 0.

Чтобы вычислить ошибку сети с одним выходным нейроном, вы должны добавить Sum (Math.Abs (idealOutput — a.Value)) в список для каждого входа. Затем усредните список, чтобы получить ошибку.

Моя реализация в C #:

int epoch = 0;
double error = 1.0;
Network = network;

while (error > minError && epoch < int.MaxValue)
{
var errors = new List<double>();
for (int i = 0; i < inputs.Count; i++)
{
Algorithm(inputs[i], ideals[i]);

int n = 0;
errors.Add(Network.Layers[Network.Layers.Count - 1].Neurons.Sum(a => Math.Abs(ideals[i][n++] - a.Value)));
}
error = errors.Average();
Console.WriteLine("Epoch: #{0} --- Error: {1}", epoch, error);
epoch++;
}

2