Как сохранить скрытые состояния LSTM при потоковой передаче данных через модель в C ++ API для Tensorflow?

Question

Как сохранить скрытые состояния LSTM при потоковой передаче данных через модель в C ++ API для Tensorflow?

Я обучил модель LSTM в keras и теперь хочу развернуть ее через C ++. Я преобразовал файл модели .hdf5 в файл .pb

Когда я передаю данные через модель в C ++ (по одному временному шагу за раз), скрытое состояние модели всегда сбрасывается для каждого временного шага. Я подтвердил это, передав один и тот же входной вектор (инициализированный случайными значениями) через модель и получая один и тот же выход каждый раз.

Как я могу сохранить скрытое состояние между вызовами сеанса-> Выполнить (…)?

Архитектура модели проста: 3 плотных слоя, за которыми следует слой LSTM.
Я буду использовать модель в потоковом режиме, то есть проходить один временной шаг за раз, когда я получаю данные для каждого временного шага. В keras я сделал слой LSTM с состоянием и обучен с помощью batch_size 1. При тестировании модели в python потоковая передача работает нормально
Если я агрегирую некоторые временные шаги, а затем передаю их как единый тензор модели в C ++, это работает нормально. Скрытое состояние обновляется и поддерживается для каждого временного шага во входном тензоре.

Код C ++

#include <vector>
#include <iostream>
#include <filesystem>
#include "tensorflow/core/lib/core/stringpiece.h"#include "tensorflow/core/lib/core/status.h"#include "tensorflow/core/lib/core/errors.h"#include "tensorflow/core/public/session.h"
std::unique_ptr<tensorflow::Session> session;

float floatRand()
{
return float(rand()) / (float(RAND_MAX) + 1.0);
}

// loads model from .pb file
bool _loadModel(const std::string& model_file_name,
std::unique_ptr<tensorflow::Session>& session)
{
tensorflow::GraphDef graph_def;
tensorflow::Status load_model_status = ReadBinaryProto(tensorflow::Env::Default(), model_file_name, &graph_def);
if (!load_model_status.ok())
{
return false;
}
tensorflow::Status session_create_status = session->Create(graph_def);
if (!session_create_status.ok())
{
return false;
}
return true;
}

// decodes LSTM model
void _decodeLSTM(std::vector<std::vector<float>>& in_vec)
{
auto sequence_length = in_vec.size();
auto input_dim = in_vec[0].size();
tensorflow::Tensor inputs(tensorflow::DT_FLOAT, tensorflow::TensorShape({1, sequence_length, input_dim}));
auto input_tensor_mapped = inputs.tensor<float, 3>();

// copying the data into the corresponding tensor
for (int s = 0; s < sequence_length; s++)
{
for (int i = 0; i < input_dim; i++)
{
input_tensor_mapped(0, s, i) = in_vec[s][i];
}
}

std::cout << inputs.shape() << std::endl ;
std::cout << inputs.DebugString() << std::endl ;

std::string input_layer = "input";
std::string output_layer = "duration_0";
std::vector<tensorflow::Tensor> outputs;
tensorflow::Status run_status = session->Run({{input_layer, inputs}},
{output_layer}, {}, &outputs);
if (!run_status.ok())
{
std::cout<< "decodeLSTM: Failed to decode LSTM correctly!" << std::endl;
return;
}

std::cout << "Decode success" << std::endl;
std::cout << outputs[0].DebugString() << std::endl;
}

int main()
{
std::filesystem::path dur_model_path = std::filesystem::path("/media//work_dir/mymodel.pb");
session.reset(tensorflow::NewSession(tensorflow::SessionOptions()));

if (_loadModel(dur_model_path, session))
{
std::cout << "Model loaded succesfully" << std::endl;
} else {
std::cout << "Failed to load model" << std::endl;
}

// creating dummy seqeunce
std::vector<float> input_feat(657, 0.0);
for (auto &val : input_feat)
{
val = floatRand();
}

std::vector<std::vector<float>> input_seq(3, input_feat);
// only first timestep
std::vector<std::vector<float>> seqA(1);
seqA[0] = input_seq[0];
// first two timesteps
std::vector<std::vector<float>> seqB(2);
seqB[0] = input_seq[0];
seqB[1] = input_seq[1];
// all three timesteps
std::vector<std::vector<float>> seqC(3);
seqC[0] = input_seq[0];
seqC[1] = input_seq[1];
seqC[2] = input_seq[2];

std::cout << "Single timesteps :" << std::endl ;
// same sequence passed multiple times
// expecting output to change each time as
// hidden state gets updated, but not the case
std::cout << "First Pass :" << std::endl
_decodeLSTM(seqA);
std::cout << "Second Pass :" << std::endl
_decodeLSTM(seqA);
std::cout << "Third Pass :" << std::endl
_decodeLSTM(seqA);

std::cout << "Stacked timesteps :" << std::endl ;
// same sequence passed multiple times
// expecting output to change each time as
// hidden state gets updated, but not the case
std::cout << "First Pass :" << std::endl
_decodeLSTM(seqA);
std::cout << "Second Pass :" << std::endl
_decodeLSTM(seqB);
std::cout << "Third Pass :" << std::endl
_decodeLSTM(seqC);
}

Функция Python для преобразования файла модели hdf5 в файл модели pb

import os
import os.path as osp
import tensorflow as tf
from keras.models import load_model
from keras.models import model_from_json
from keras import backend as K

def convertGraph(modelPath, outdir, numoutputs, prefix, name, json):
'''
Converts an HD5F file to a .pb file for use with Tensorflow.

Args:
modelPath (str): path to the .hdf5 file
outdir (str): path to the output directory
numoutputs (int): number of model outputs
prefix (str): the prefix of the output aliasing
name (str):
Returns:
None
'''

#NOTE: If using Python > 3.2, this could be replaced with os.makedirs( name, exist_ok=True )
if not osp.isdir(outdir):
os.mkdir(outdir)

K.set_learning_phase(0)

# load the model
print("Loading from " + modelPath)
if json == True:
json_file = open(modelPath+".json", 'r')
loaded_model_json = json_file.read()
json_file.close()
net_model = model_from_json(loaded_model_json)
net_model.load_weights(modelPath+".h5")
else:
net_model = load_model(modelPath)

# Alias the outputs in the model - this sometimes makes them easier to access in TF
pred = [None]*numoutputs
pred_node_names = [None]*numoutputs
for i in range(numoutputs):
pred_node_names[i] = prefix+'_'+str(i)
pred[i] = tf.identity(net_model.output[i], name=pred_node_names[i])
print('Output nodes names are: ', pred_node_names)

sess = K.get_session()

# Write the graph in human readable
f = 'graph_def_for_reference.pb.ascii'
tf.train.write_graph(sess.graph.as_graph_def(), outdir, f, as_text=True)
print('Saved the graph definition in ascii format at: ', osp.join(outdir, f))

# Write the graph in binary .pb file
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), pred_node_names)
graph_io.write_graph(constant_graph, outdir, name, as_text=False)
print('Saved the constant graph (ready for inference) at: ', osp.join(outdir, name))

Я надеялся, что скрытое состояние слоев LSTM будет автоматически обновлено и сохранено после передачи данных через модель. Это происходит в Python, но не в C ++. Каждый вызов session-> Run (…) использует начальное значение скрытого состояния

0

c++inference lstm recurrent-neural-network tensorflow

Решение

Задача ещё не решена.

Другие решения

Других решений пока нет …

Источник