Как правильно реализовать пользовательскую функцию потерь в caffe?

Question

Как правильно реализовать пользовательскую функцию потерь в caffe?

Я начинающий в кафе, и я реализую пользовательскую функцию потери кофе. Но ошибка возникает при тестировании.

Моя функция потери похожа на евклидову потерю. Первоначальное евклидово уравнение потерь приведено ниже.

Я хотел бы реализовать 2D потерю расстояния. Поэтому я сделал уравнение, как показано ниже.

Тогда результат прогона — ошибка при обратной функции. Я думал, что способ обратной пропаганды — это что-то не так. Однако я не уверен, что не так. Я просто модифицирую евклидову потерю, чтобы скорректировать свою функцию потерь, и я написал градиент своей функции потерь при обратном распространении. Знаете ли вы, почему произошла ошибка?

imgdist_loss_layer.cpp

#include <vector>

#include "caffe/layers/imgdist_loss_layer.hpp"#include "caffe/util/math_functions.hpp"
namespace caffe {

template <typename Dtype>
void ImgdistLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
<< "Inputs must have the same dimension.";
diff_.ReshapeLike(*bottom[0]);
}

// forward propagation
// calculate loss
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count() / 2;

Dtype loss = 0;
for (int i = 0; i < count; ++i) {
Dtype x_sub = bottom[0]->cpu_data()[2 * i] - bottom[1]->cpu_data()[2 * i];
Dtype y_sub = bottom[0]->cpu_data()[2 * i + 1] - bottom[1]->cpu_data()[2 * i + 1];

loss += x_sub*x_sub + y_sub*y_sub;
}
loss = loss / bottom[0]->num();
top[0]->mutable_cpu_data()[0] = loss;
}

// back propagation
// calculate gradient
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype* bottom_data_0 = bottom[0]->cpu_data();
const Dtype* bottom_data_1 = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();const int count = bottom[0]->count() / 2;
for (int j = 0; j < count; ++j) {

const Dtype x_sub = bottom_data_0[2 * j] - bottom_data_1[2 * j];
const Dtype y_sub = bottom_data_0[2 * j + 1] - bottom_data_1[2 * j + 1];

const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom[i]->num();
const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom[i]->num();bottom_diff[2 * j] = top[0]->cpu_diff()[0] * alpha_0;
bottom_diff[2 * j + 1] = top[0]->cpu_diff()[0] * alpha_1;

} // j
}
} // i
}#ifdef CPU_ONLY
STUB_GPU(ImgDistLossLayer);
#endif

INSTANTIATE_CLASS(ImgdistLossLayer);
REGISTER_LAYER_CLASS(ImgdistLoss);

} // namespace caffe

imgdist_loss_layer.cu

#include <vector>

#include "caffe/layers/imgdist_loss_layer.hpp"#include "caffe/util/math_functions.hpp"
namespace caffe {

// forward propagation loop
template <typename Dtype>
__global__ void imgdistLossForwardGPU(const int nthreads,
const Dtype* input_data, const Dtype* target, Dtype* loss) {
CUDA_KERNEL_LOOP(i, nthreads) {
loss[i] = (input_data[2 * i] - target[2 * i]) * (input_data[2 * i] - target[2 * i])
+ (input_data[2 * i + 1] - target[2 * i + 1]) * (input_data[2 * i + 1] - target[2 * i + 1]);

}

}// forward propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int count = bottom[0]->count() / 2;

const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
Dtype* loss_data = bottom[0]->mutable_gpu_diff();

imgdistLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, input_data, target, loss_data);
CUDA_POST_KERNEL_CHECK;Dtype loss;
caffe_gpu_asum(count, loss_data, &loss);

loss = loss / bottom[0]->num();
top[0]->mutable_cpu_data()[0] = loss;

}

// back propagation loop
template <typename Dtype>
__global__ void imgdistLossBackwardGPU(const int nthreads,
const Dtype* input_data, const Dtype* target, Dtype* diff,
const Dtype sign, const Dtype toploss, const Dtype bottom_num) {
CUDA_KERNEL_LOOP(i, nthreads) {

const Dtype x_sub = input_data[2 * i] - target[2 * i];
const Dtype y_sub = input_data[2 * i + 1] - target[2 * i + 1];
const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom_num;
const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom_num;

diff[2 * i] = toploss * alpha_0;
diff[2 * i + 1] = toploss * alpha_1;
}}

// back propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const int count = bottom[0]->count() / 2;
const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
const Dtype toploss = top[0]->cpu_diff()[0];
const Dtype bottom_num = bottom[i]->num();
Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();imgdistLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, input_data, target, bottom_diff, sign, toploss, bottom_num);
CUDA_POST_KERNEL_CHECK;
}
}}

INSTANTIATE_LAYER_GPU_FUNCS(ImgdistLossLayer);

}  // namespace caffe

imgdist_loss_layer.hpp (только изменить имя класса)

#ifndef CAFFE_IMGDIST_LOSS_LAYER_HPP_
#define CAFFE_IMGDIST_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/loss_layer.hpp"

namespace caffe {

template <typename Dtype>
class ImgdistLossLayer : public LossLayer<Dtype> {
public:
explicit ImgdistLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "ImgdistLoss"; }

virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}protected:
/// @copydoc EuclideanLossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> diff_;
};

}  // namespace caffe

#endif  // CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_

test_imgdist_loss_layer.cpp

#include <cmath>
#include <vector>

#include "gtest/gtest.h"
#include "caffe/blob.hpp"#include "caffe/common.hpp"#include "caffe/filler.hpp"#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/test/test_caffe_main.hpp"#include "caffe/test/test_gradient_check_util.hpp"
namespace caffe {

template<typename TypeParam>
class ImgdistLossLayerTest : public MultiDeviceTest<TypeParam> {
typedef typename TypeParam::Dtype Dtype;protected:
ImgdistLossLayerTest()
: blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)),
blob_bottom_label_(new Blob<Dtype>(10, 5, 1, 1)),
blob_top_loss_(new Blob<Dtype>()) {
// fill the values
FillerParameter filler_param;
GaussianFiller<Dtype> filler(filler_param);
filler.Fill(this->blob_bottom_data_);
blob_bottom_vec_.push_back(blob_bottom_data_);
filler.Fill(this->blob_bottom_label_);
blob_bottom_vec_.push_back(blob_bottom_label_);
blob_top_vec_.push_back(blob_top_loss_);
}
virtual ~ImgdistLossLayerTest() {
delete blob_bottom_data_;
delete blob_bottom_label_;
delete blob_top_loss_;
}

void TestForward() {
// Get the loss without a specified objective weight -- should be
// equivalent to explicitly specifying a weight of 1.
LayerParameter layer_param;
ImgdistLossLayer<Dtype> layer_weight_1(layer_param);
layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype loss_weight_1 =
layer_weight_1.Forward(this->blob_bottom_vec_, this->blob_top_vec_);

// Get the loss again with a different objective weight; check that it is
// scaled appropriately.
const Dtype kLossWeight = 3.7;
layer_param.add_loss_weight(kLossWeight);
ImgdistLossLayer<Dtype> layer_weight_2(layer_param);
layer_weight_2.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype loss_weight_2 =
layer_weight_2.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype kErrorMargin = 1e-5;
EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin);
// Make sure the loss is non-trivial.
const Dtype kNonTrivialAbsThresh = 1e-1;
EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh);
}

Blob<Dtype>* const blob_bottom_data_;
Blob<Dtype>* const blob_bottom_label_;
Blob<Dtype>* const blob_top_loss_;
vector<Blob<Dtype>*> blob_bottom_vec_;
vector<Blob<Dtype>*> blob_top_vec_;
};

TYPED_TEST_CASE(ImgdistLossLayerTest, TestDtypesAndDevices);

TYPED_TEST(ImgdistLossLayerTest, TestForward) {
this->TestForward();
}

TYPED_TEST(ImgdistLossLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
const Dtype kLossWeight = 3.7;
layer_param.add_loss_weight(kLossWeight);
ImgdistLossLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_);
}

}

Журнал ошибок находится ниже.

    C:\Projects\caffe\include\caffe/test/test_gradient_check_util.hpp(175): error: The difference between computed_gradient and estimated_gradient is 1.5981258813447825, which exceeds threshold_ * scale, where
computed_gradient evaluates to 2.755687472811343,
estimated_gradient evaluates to 1.1575615914665605, and
threshold_ * scale evaluates to 0.027556874728113429.
debug: (top_id, top_data_id, blob_id, feat_id)=0,0,1,49; feat = 1.5097962694948988; objective+ = 20.508002455868997; objective- = 20.484851224039666
[  FAILED  ] ImgdistLossLayerTest/3.TestGradient, where TypeParam = struct caffe::GPUDevice<double> (204 ms)
[----------] 2 tests from ImgdistLossLayerTest/3 (222 ms total)

[----------] Global test environment tear-down
[==========] 8 tests from 4 test cases ran. (878 ms total)
[  PASSED  ] 4 tests.
[  FAILED  ] 4 tests, listed below:
[  FAILED  ] ImgdistLossLayerTest/0.TestGradient, where TypeParam = struct caffe::CPUDevice<float>
[  FAILED  ] ImgdistLossLayerTest/1.TestGradient, where TypeParam = struct caffe::CPUDevice<double>
[  FAILED  ] ImgdistLossLayerTest/2.TestGradient, where TypeParam = struct caffe::GPUDevice<float>
[  FAILED  ] ImgdistLossLayerTest/3.TestGradient, where TypeParam = struct caffe::GPUDevice<double>

4 FAILED TESTS

0

c++caffe euclidean-distance loss-function

Решение

Задача ещё не решена.

Другие решения

Других решений пока нет …

Источник