машинное обучение — изменение кода предсказания Caffe C ++ для нескольких входов

Question

машинное обучение — изменение кода предсказания Caffe C ++ для нескольких входов

Я реализовал модифицированную версию Пример Caffe C ++ и хотя он работает очень хорошо, он невероятно медленный, потому что он принимает изображения только один за другим. В идеале я хотел бы передать Кафе вектор из 200 изображений и вернуть лучший прогноз для каждого. Я получил немного большая помощь от Fanglin Wang и выполнил некоторые из его рекомендаций, но мне все еще не удается решить, как извлечь наилучший результат из каждого изображения.

Методу Classify теперь передан вектор cv::Mat объекты (переменные input_channels), который представляет собой вектор изображений с плавающей точкой в оттенках серого. Я исключил метод предварительной обработки в коде, потому что мне не нужно преобразовывать эти изображения в число с плавающей запятой или вычитать среднее изображение. Я также пытался избавиться от N переменная, потому что я хочу вернуть только прогноз и вероятность для каждого изображения.

#include "Classifier.h"using namespace caffe;
using std::string;

Classifier::Classifier(const string& model_file, const string& trained_file, const string& label_file) {
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif

/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);

Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());

/* Load labels. */
std::ifstream labels(label_file.c_str());
CHECK(labels) << "Unable to open labels file " << label_file;
string line;
while (std::getline(labels, line))
labels_.push_back(string(line));

Blob<float>* output_layer = net_->output_blobs()[0];
CHECK_EQ(labels_.size(), output_layer->channels())
<< "Number of labels is different from the output layer dimension.";
}

static bool PairCompare(const std::pair<float, int>& lhs, const std::pair<float, int>& rhs) {
return lhs.first > rhs.first;
}

/* Return the indices of the top N values of vector v. */
static std::vector<int> Argmax(const std::vector<float>& v, int N) {
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);

std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
}

/* Return the top N predictions. */
std::vector<Prediction> Classifier::Classify(const std::vector<cv::Mat> &input_channels) {
std::vector<float> output = Predict(input_channels);

std::vector<int> maxN = Argmax(output, 1);
int idx = maxN[0];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
return predictions;
}

std::vector<float> Classifier::Predict(const std::vector<cv::Mat> &input_channels, int num_images) {
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(num_images, num_channels_,
input_geometry_.height, input_geometry_.width);
/* Forward dimension change to all layers. */
net_->Reshape();

WrapInputLayer(&input_channels);

net_->ForwardPrefilled();

/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
const float* begin = output_layer->cpu_data();
const float* end = begin + num_images * output_layer->channels();
return std::vector<float>(begin, end);
}

/* Wrap the input layer of the network in separate cv::Mat objects (one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer. */
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
Blob<float>* input_layer = net_->input_blobs()[0];

int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels() * num_images; ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}

ОБНОВИТЬ

Большое спасибо за вашу помощь, Шай, я внес изменения, которые вы порекомендовали, но, похоже, у меня возникают странные проблемы с компиляцией, которые я не могу решить (мне удалось разобраться с некоторыми проблемами).

Это изменения, которые я сделал:

Заголовочный файл:

#ifndef __CLASSIFIER_H__
#define __CLASSIFIER_H__

#include <caffe/caffe.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>using namespace caffe;  // NOLINT(build/namespaces)
using std::string;

/* Pair (label, confidence) representing a prediction. */
typedef std::pair<string, float> Prediction;

class Classifier {
public:
Classifier(const string& model_file,
const string& trained_file,
const string& label_file);

std::vector< std::pair<int,float> > Classify(const std::vector<cv::Mat>& img);

private:

std::vector< std::vector<float> > Predict(const std::vector<cv::Mat>& img, int nImages);

void WrapInputLayer(std::vector<cv::Mat>* input_channels, int nImages);

void Preprocess(const std::vector<cv::Mat>& img,
std::vector<cv::Mat>* input_channels, int nImages);

private:
shared_ptr<Net<float> > net_;
cv::Size input_geometry_;
int num_channels_;
std::vector<string> labels_;
};

#endif /* __CLASSIFIER_H__ */

Файл класса:

#define CPU_ONLY
#include "Classifier.h"
using namespace caffe;  // NOLINT(build/namespaces)
using std::string;

Classifier::Classifier(const string& model_file,
const string& trained_file,
const string& label_file) {
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif

/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);

CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";

Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());

/* Load labels. */
std::ifstream labels(label_file.c_str());
CHECK(labels) << "Unable to open labels file " << label_file;
string line;
while (std::getline(labels, line))
labels_.push_back(string(line));

Blob<float>* output_layer = net_->output_blobs()[0];
CHECK_EQ(labels_.size(), output_layer->channels())
<< "Number of labels is different from the output layer dimension.";
}

static bool PairCompare(const std::pair<float, int>& lhs,
const std::pair<float, int>& rhs) {
return lhs.first > rhs.first;
}

/* Return the indices of the top N values of vector v. */
static std::vector<int> Argmax(const std::vector<float>& v, int N) {
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);

std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
}

std::vector< std::pair<int,float> > Classifier::Classify(const std::vector<cv::Mat>& img) {
std::vector< std::vector<float> > output = Predict(img, img.size());

std::vector< std::pair<int,float> > predictions;
for ( int i = 0 ; i < output.size(); i++ ) {
std::vector<int> maxN = Argmax(output[i], 1);
int idx = maxN[0];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
}
return predictions;
}

std::vector< std::vector<float> > Classifier::Predict(const std::vector<cv::Mat>& img, int nImages) {
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(nImages, num_channels_,
input_geometry_.height, input_geometry_.width);
/* Forward dimension change to all layers. */
net_->Reshape();

std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels, nImages);

Preprocess(img, &input_channels, nImages);

net_->ForwardPrefilled();

/* Copy the output layer to a std::vector */

Blob<float>* output_layer = net_->output_blobs()[0];
std::vector <std::vector<float> > ret;
for (int i = 0; i < nImages; i++) {
const float* begin = output_layer->cpu_data() + i*output_layer->channels();
const float* end = begin + output_layer->channels();
ret.push_back( std::vector<float>(begin, end) );
}
return ret;
}

/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels, int nImages) {
Blob<float>* input_layer = net_->input_blobs()[0];

int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels()* nImages; ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}

void Classifier::Preprocess(const std::vector<cv::Mat>& img,
std::vector<cv::Mat>* input_channels, int nImages) {
for (int i = 0; i < nImages; i++) {
vector<cv::Mat> channels;
cv::split(img[i], channels);
for (int j = 0; j < channels.size(); j++){
channels[j].copyTo((*input_channels)[i*num_channels_[0]+j]);
}
}
}

12

c++caffe deep-learning machine-learning neural-network

Решение

Другие решения

К сожалению, я не верю, что было выполнено распараллеливание сетевых проходов. Тем не менее, если вы хотите, чтобы вы могли просто реализовать свою собственную оболочку для многократного параллельного запуска данных через копии вашей сети?

Посмотри на Сколько изображений вы можете передать Кафе одновременно?

В связанном прототипе все, что вам нужно определить, это

input_shape {
dim: 64 // num of images
dim: 1
dim: 28 // height
dim: 28 // width
}

Существующая реализация оценивает пакет из 64 изображений, но не обязательно параллельно. Однако при работе на графическом процессоре обработка пакета из 64 будет быстрее, чем 64 пакета из одного изображения.

4

Источник

Accepted Answer

Если я правильно понимаю вашу проблему, вы вводите n образы, ожидающие n пара (label, prob), но получаю только одну такую пару.

Я считаю, что эти модификации должны помочь вам:

Classifier::Predict должен вернуть vector< vector<float> >, это вектор вероятностей на входное изображение. Это vector размера n векторов размера output_layer->channels():

std::vector< std::vecot<float> >
Classifier::Predict(const std::vector<cv::Mat> &input_channels,
int num_images) {
// same code here...

/* changes here: Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
std::vector< std::vector<float> > ret;
for ( int i = 0 ; i < num_images ; i++ ) {
const float* begin = output_layer->cpu_data() + i*output_layer->channels();
const float* end = begin + output_layer->channels();
ret.push_back( std::vector<float>(begin, end) );
}
return ret;
}

В Classifier::Classify вам нужно обработать каждый vector<float> через Argmax independantly:

 std::vector< std::pair<int,float> >
Classifier::Classify(const std::vector<cv::Mat> &input_channels) {

std::vector< std::vector<float> > output = Predict(input_channels);

std::vector< std::pair<int,float> > predictions;
for ( int i = 0 ; i < output.size(); i++ ) {
std::vector<int> maxN = Argmax(output[i], 1);
int idx = maxN[0];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
}
return predictions;
}

10