Тензор потока не может рассчитать градиент работы

Question

Тензор потока не может рассчитать градиент работы

У меня есть простая модель Tensorflow, которая написана на Python и может быть обучена гладко. Вот модель, которую я использую:

import tensorflow as tf from tensorflow.python.framework
import ops from tensorflow.python.framework
import dtypes

import random  import numpy as np

NUM_CLASSES = 102  IMAGE_HEIGHT = 224  IMAGE_WIDTH = 224  BATCH_SIZE = 25  NUM_CHANNELS = 3  LEARNING_RATE = 0.0001

with tf.Session() as sess:images_placeholder = tf.placeholder (tf.float32,
shape=(BATCH_SIZE, IMAGE_HEIGHT,
IMAGE_WIDTH, NUM_CHANNELS), name="input")   labels_placeholder = tf.placeholder (tf.float32,
shape=(BATCH_SIZE), name="label")

with tf.name_scope("conv1_1") as scope:         kernel = tf.Variable (tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32, stddev=1e-2),
name="weights")         conv = tf.nn.conv2d (images_placeholder, kernel, [1, 1, 1, 1], padding='SAME')      biases = tf.Variable (tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')      out = tf.nn.bias_add (conv, biases)         conv1_1 = tf.nn.relu (out, name=scope)

pool1 = tf.nn.max_pool (conv1_1,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1')

with tf.name_scope('conv2_1') as scope:         kernel = tf.Variable (tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32,
stddev=1e-2), name='weights')       conv = tf.nn.conv2d (pool1, kernel, [1, 1, 1, 1], padding='SAME')       biases = tf.Variable (tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')      out = tf.nn.bias_add (conv, biases)         conv2_1 = tf.nn.relu (out, name=scope)

pool2 = tf.nn.max_pool (conv2_1,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')

with tf.name_scope('conv3_1') as scope:         kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32,
stddev=1e-2), name='weights')       conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')      out = tf.nn.bias_add(conv, biases)      conv3_1 = tf.nn.relu(out, name=scope)

pool3 = tf.nn.max_pool (conv3_1,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool3')

with tf.name_scope('conv4_1') as scope:         kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32,
stddev=1e-2), name='weights')       conv = tf.nn.conv2d(pool3, kernel, [1, 1, 1, 1], padding='SAME')        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')      out = tf.nn.bias_add(conv, biases)      conv4_1 = tf.nn.relu(out, name=scope)

pool4 = tf.nn.max_pool (conv4_1,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')

with tf.name_scope('mentee_conv5_1') as scope:      kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-2), name='weights')       conv = tf.nn.conv2d(pool4, kernel, [1, 1, 1, 1], padding='SAME')        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')         out = tf.nn.bias_add(conv, biases)      conv5_1 = tf.nn.relu(out, name=scope)

pool5 = tf.nn.max_pool (conv5_1,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool5')

with tf.name_scope('fc1') as scope:         shape = int(np.prod(pool5.get_shape()[1:]))         fc1w = tf.Variable(tf.truncated_normal([shape, 4096], dtype=tf.float32,
stddev=1e-2), name='weights')       fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')      pool5_flat = tf.reshape(pool5, [-1, shape])         fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b)        fc1 = tf.nn.relu(fc1l)
fc1 = tf.nn.dropout(fc1, 0.5)labels = tf.to_int64(labels_placeholder)    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits (labels=labels,
logits=fc1, name="xentropy")    loss = tf.reduce_mean (cross_entropy, name='loss')

optimizer = tf.train.AdamOptimizer (LEARNING_RATE)  global_step = tf.Variable (0, name='global_step', trainable=False)  train_op = optimizer.minimize (loss, global_step=global_step, name="train")

init = tf.initialize_variables (tf.all_variables(), name='init_all_vars_op')    tf.train.write_graph (sess.graph_def, "models/", "graph.pb", as_text=False)

Я экспортирую модель в версию protobuf, а затем загружаю ее в свой код C ++, чтобы обучить модель с использованием API C ++ вместо Python. К сожалению, я получу сообщение об ошибке SparseSoftmaxCrossEntropy:

E tensorflow/core/common_runtime/executor.cc:594] Executor failed to create kernel. Invalid argument: NodeDef mentions attr 'message' not in Op<name=PreventGradient; signature=input:T -> output:T; attr=T:type>; NodeDef: gradients/xentropy/xentropy_grad/PreventGradient = PreventGradient[T=DT_FLOAT, message="Currently there is no way to take the second derivative of sparse_softmax_cross_entropy_with_logits due to the fused implementation\'s interaction with tf.gradients()", _device="/job:localhost/replica:0/task:0/cpu:0"](xentropy/xentropy:1)

Я проверил исходный код tenorflow и понял, что градиентная калькуляция для этой операции не реализована. Но мне интересно, как версия Python успешно работает без проблем, но в C ++ он пытается вычислить градиент?

0

c++gradient softmax tensorflow

Решение

Задача ещё не решена.

Другие решения

Других решений пока нет …

Источник