keras获悉LSTM网络中的前向通行证 [英] Foward pass in LSTM netwok learned by keras

查看:87
本文介绍了keras获悉LSTM网络中的前向通行证的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有以下代码,希望从2层LSTM中获得正向传递:

I have the following code that I am hoping to get a forward pass from a 2 layer LSTM:

"""
this is a simple numerical example of LSTM forward pass to allow deep understanding
the LSTM is trying to learn the sin function by learning to predict the next value after a sequence of 3 inputs
    example 1: {0.583, 0.633, 0.681} --> {0.725}, these values correspond to
               {sin(35.66), sin(39.27}, sin(42.92)} --> {sin(46.47)}
    example 2: {0.725, 0.767, 0.801} --> {0.849}, these values correspond to
               {sin(46.47), sin(50.09), sin(53.23)} --> {sin(58.10)}

example tested:  [[['0.725323664']
                   ['0.7671179']
                   ['0.805884672']]]
predicted_instance:  [ 0.83467698]


training example pair:  [['0.680666907']
 ['0.725323664']
 ['0.7671179']] 0.805884672

"""
import numpy as np


# linear activation matrix-wise (works also element-wise)
def linear(x):
    return x


# sigmoid function matrix-wise (works also element-wise)
def sigmoid(x):
    return 1/(1 + np.exp(-x))


# hard sigmoid function element wise
def hard_sig(x):
    # in Keras for both tensorflow and theano backend
    return np.max(np.array([0.0, np.min(np.array([1.0, x * 0.2 + 0.5]))]))
    # Courbariaux et al. 2016 (Binarized Neural Networks)
    # return np.max(np.array([0.0, np.min(np.array([1.0, (x + 1.0)/2.0]))]))


# hard sigmoid function matrix wise
def hard_sigmoid(x, fun=hard_sig):
    return np.vectorize(fun)(x)


# hyperbolic tangent function matrix wise (works also element-wise)
def hyperbolic_tangent(x):
    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))


print(sigmoid(np.array([-100, 0, 100])))
print(hard_sigmoid(np.array([-100, 0, 0.1, 100])))
print(hyperbolic_tangent(np.array([-100, 0, 100])))

parameter_names = ['lstm_1_kernel_0.npy',
                   'lstm_1_recurrent_kernel_0.npy',
                   'lstm_1_bias_0.npy',
                   'lstm_2_kernel_0.npy',
                   'lstm_2_recurrent_kernel_0.npy',
                   'lstm_2_bias_0.npy',
                   'dense_1_kernel_0.npy',
                   'dense_1_bias_0.npy']


# LSTM 1 Weights
lstm_1_kernel_0 = np.load('lstm_1_kernel_0.npy')
print('lstm_1_kernel_0: ', lstm_1_kernel_0.shape)
lstm_1_recurrent_kernel_0 = np.load('lstm_1_recurrent_kernel_0.npy')
print('lstm_1_recurrent_kernel_0: ', lstm_1_recurrent_kernel_0.shape)
lstm_1_bias_0 = np.load('lstm_1_bias_0.npy')
print('lstm_1_bias_0: ', lstm_1_bias_0.shape)

# LSTM 2 Wights
lstm_2_kernel_0 = np.load('lstm_2_kernel_0.npy')
print('lstm_2_kernel_0: ', lstm_2_kernel_0.shape)
lstm_2_recurrent_kernel_0 = np.load('lstm_2_recurrent_kernel_0.npy')
print('lstm_2_recurrent_kernel_0: ', lstm_2_recurrent_kernel_0.shape)
lstm_2_bias_0 = np.load('lstm_2_bias_0.npy')
print('lstm_2_bias_0: ', lstm_2_bias_0.shape)

# Dense layer
dense_1_kernel_0 = np.load('dense_1_kernel_0.npy')
print('dense_1_kernel_0: ', dense_1_kernel_0.shape)
dense_1_bias_0 = np.load('dense_1_bias_0.npy')
print('dense_1_bias_0: ', dense_1_bias_0.shape)

time_seq = [0, 1, 2]
"""
input_seq = np.array([[[0.725323664],
                       [0.7671179],
                       [0.805884672]]])
"""
input_seq = np.array([[[0.680666907],
                       [0.725323664],
                       [0.7671179]]])
print('input_seq: ', input_seq.shape)
for time in time_seq:
    print('input t', time, ':', input_seq[0, time, 0])

"""
# z0 = z[:, :self.units]
# z1 = z[:, self.units: 2 * self.units]
# z2 = z[:, 2 * self.units: 3 * self.units]
# z3 = z[:, 3 * self.units:]

# i = self.recurrent_activation(z0)
# f = self.recurrent_activation(z1)
# c = f * c_tm1 + i * self.activation(z2)
# o = self.recurrent_activation(z3)

# activation =' tanh'
# recurrent_activation = 'hard_sigmoid'
"""


# LSTM 1
x_1_lstm_1 = input_seq[0, 0, 0]
print('x_1: ', x_1_lstm_1)
x_2_lstm_1 = input_seq[0, 1, 0]
print('x_2: ', x_2_lstm_1)
x_3_lstm_1 = input_seq[0, 2, 0]
print('x_3: ', x_3_lstm_1)

c_0_lstm_1 = np.zeros((1, 3))
h_0_lstm_1 = np.zeros((1, 3))

z_1_lstm_1 = np.dot(x_1_lstm_1, lstm_1_kernel_0) + np.dot(h_0_lstm_1, lstm_1_recurrent_kernel_0) + lstm_1_bias_0
print(z_1_lstm_1.shape)
i_1_lstm_1 = sigmoid(z_1_lstm_1[:, 0:3])
f_1_lstm_1 = sigmoid(z_1_lstm_1[:, 3:6])
input_to_c_1_lstm_1 = z_1_lstm_1[:, 6:9]
o_1_lstm_1 = sigmoid(z_1_lstm_1[:, 9:12])
c_1_lstm_1 = np.multiply(f_1_lstm_1, c_0_lstm_1) + np.multiply(i_1_lstm_1, hyperbolic_tangent(input_to_c_1_lstm_1))
h_1_lstm_1 = np.multiply(o_1_lstm_1, hyperbolic_tangent(c_1_lstm_1))
print('h_1_lstm_1: ', h_1_lstm_1.shape, h_1_lstm_1)

z_2_lstm_1 = np.dot(x_2_lstm_1, lstm_1_kernel_0) + np.dot(h_1_lstm_1, lstm_1_recurrent_kernel_0) + lstm_1_bias_0
print(z_2_lstm_1.shape)
i_2_lstm_1 = sigmoid(z_2_lstm_1[:, 0:3])
f_2_lstm_1 = sigmoid(z_2_lstm_1[:, 3:6])
input_to_c_2_lstm_1 = z_2_lstm_1[:, 6:9]
o_2_lstm_1 = sigmoid(z_2_lstm_1[:, 9:12])
c_2_lstm_1 = np.multiply(f_2_lstm_1, c_1_lstm_1) + np.multiply(i_2_lstm_1, hyperbolic_tangent(input_to_c_2_lstm_1))
h_2_lstm_1 = np.multiply(o_2_lstm_1, hyperbolic_tangent(c_2_lstm_1))
print('h_2_lstm_1: ', h_2_lstm_1.shape, h_2_lstm_1)

z_3_lstm_1 = np.dot(x_3_lstm_1, lstm_1_kernel_0) + np.dot(h_2_lstm_1, lstm_1_recurrent_kernel_0) + lstm_1_bias_0
print(z_3_lstm_1.shape)
i_3_lstm_1 = sigmoid(z_3_lstm_1[:, 0:3])
f_3_lstm_1 = sigmoid(z_3_lstm_1[:, 3:6])
input_to_c_3_lstm_1 = z_3_lstm_1[:, 6:9]
o_3_lstm_1 = sigmoid(z_3_lstm_1[:, 9:12])
c_3_lstm_1 = np.multiply(f_3_lstm_1, c_2_lstm_1) + np.multiply(i_3_lstm_1, hyperbolic_tangent(input_to_c_3_lstm_1))
h_3_lstm_1 = np.multiply(o_3_lstm_1, hyperbolic_tangent(c_3_lstm_1))
print('h_3_lstm_1: ', h_3_lstm_1.shape, h_3_lstm_1)

# LSTM 2
x_1_lstm_2 = h_1_lstm_1
x_2_lstm_2 = h_2_lstm_1
x_3_lstm_2 = h_3_lstm_1

c_0_lstm_2 = np.zeros((1, 1))
h_0_lstm_2 = np.zeros((1, 1))

z_1_lstm_2 = np.dot(x_1_lstm_2, lstm_2_kernel_0) + np.dot(h_0_lstm_2, lstm_2_recurrent_kernel_0) + lstm_2_bias_0
print(z_1_lstm_2.shape)
i_1_lstm_2 = sigmoid(z_1_lstm_2[:, 0])
f_1_lstm_2 = sigmoid(z_1_lstm_2[:, 1])
input_to_c_1_lstm_2 = z_1_lstm_2[:, 2]
o_1_lstm_2 = sigmoid(z_1_lstm_2[:, 3])
c_1_lstm_2 = np.multiply(f_1_lstm_2, c_0_lstm_2) + np.multiply(i_1_lstm_2, hyperbolic_tangent(input_to_c_1_lstm_2))
h_1_lstm_2 = np.multiply(o_1_lstm_2, hyperbolic_tangent(c_1_lstm_2))
print('h_1_lstm_2: ', h_1_lstm_2.shape, h_1_lstm_2)

z_2_lstm_2 = np.dot(x_2_lstm_2, lstm_2_kernel_0) + np.dot(h_1_lstm_2, lstm_2_recurrent_kernel_0) + lstm_2_bias_0
print(z_2_lstm_2.shape)
i_2_lstm_2 = sigmoid(z_2_lstm_2[:, 0])
f_2_lstm_2 = sigmoid(z_2_lstm_2[:, 1])
input_to_c_2_lstm_2 = z_2_lstm_2[:, 2]
o_2_lstm_2 = sigmoid(z_2_lstm_2[:, 3])
c_2_lstm_2 = np.multiply(f_2_lstm_2, c_1_lstm_2) + np.multiply(i_2_lstm_2, hyperbolic_tangent(input_to_c_2_lstm_2))
h_2_lstm_2 = np.multiply(o_2_lstm_2, hyperbolic_tangent(c_2_lstm_2))
print('h_2_lstm_2: ', h_2_lstm_2.shape, h_2_lstm_2)

z_3_lstm_2 = np.dot(x_3_lstm_2, lstm_2_kernel_0) + np.dot(h_2_lstm_2, lstm_2_recurrent_kernel_0) + lstm_2_bias_0
print(z_3_lstm_2.shape)
i_3_lstm_2 = sigmoid(z_3_lstm_2[:, 0])
f_3_lstm_2 = sigmoid(z_3_lstm_2[:, 1])
input_to_c_3_lstm_2 = z_3_lstm_2[:, 2]
o_3_lstm_2 = sigmoid(z_3_lstm_2[:, 3])
c_3_lstm_2 = np.multiply(f_3_lstm_2, c_2_lstm_2) + np.multiply(i_3_lstm_2, hyperbolic_tangent(input_to_c_3_lstm_2))
h_3_lstm_2 = np.multiply(o_3_lstm_2, hyperbolic_tangent(c_3_lstm_2))
print('h_3_lstm_2: ', h_3_lstm_2.shape, h_3_lstm_2)

output = np.dot(h_3_lstm_2, dense_1_kernel_0) + dense_1_bias_0
print('output: ', output)

重量已在火车时刻保存到文件中,可以从以下位置检索:

The weights have been saved to file at train time and they can be retrieved from the following location:

LSTM权重

为了创建适合正弦波信号的LSTM,我在Keras中使用了以下代码:

In order to create the LSTM which is fitting a sinwave signal I have used the following code in Keras:

def build_simple_model(layers):
    model = Sequential()

    model.add(LSTM(input_shape=(layers[1], layers[0]),
                   output_dim=layers[1],
                   return_sequences=True,
                   activation='tanh',
                   recurrent_activation='sigmoid')) # 'hard_sigmoid'
    # model.add(Dropout(0.2))
    model.add(LSTM(layers[2],
                   return_sequences=False,
                   activation='tanh',
                   recurrent_activation='sigmoid')) # 'hard_sigmoid'

    # model.add(Dropout(0.2))
    model.add(Dense(output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop")
    print("> Compilation Time : ", time.time() - start)
    plot_model(model, to_file='lstm_model.png', show_shapes=True, show_layer_names=True)
    print(model.summary())
    return model

这产生了以下模型:

我使用了以下培训程序:

I have used the training procedure as follows:

    seq_len = 3        
    model = lstm.build_simple_model([1, seq_len, 1, 1])

    model.fit(X_train,
              y_train,
              batch_size=512,
              nb_epoch=epochs,
              validation_split=0.05)

是否有可能理解为什么我的前向通过在基于之前的三个连续值预测未来的sin()信号值时未产生期望的输出的原因.

Would it be possible to understand why my forward pass does not produce the desired output in predicting a future sin() signal value based on three previous consecutive ones.

我要基于其正向通过练习的原始示例源自

The original example on which I am trying to base my forward pass exercise originates here. The weights uploaded in .npy format are from a network that is able to perfectly predict the next sin() value in a series.

推荐答案

我意识到问题出在哪里.我试图使用Tensorflow会话(在模型拟合后)提取模型权重,而不是直接通过Keras方法提取模型权重.这样得出的权重矩阵很有意义(在维数方面),但包含初始化步骤中的值.

I realised what the problem was. I was trying to extract my model weights using Tensorflow session (after model fitting), rather than via Keras methods directly. This resulted in weights matrices that made perfect sense (dimension wise) but contained the values from initialization step.

model.fit(X_train,
          y_train,
          batch_size=batch_size,
          nb_epoch=epochs,
          validation_split=0.05,
          callbacks=callbacks_list)

print('n_parameters: ', len(model.weights))
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

parameter_names = ['lstm_1_kernel_0',
                   'lstm_1_recurrent_kernel_0',
                   'lstm_1_bias_0',
                   'lstm_2_kernel_0',
                   'lstm_2_recurrent_kernel_0',
                   'lstm_2_bias_0',
                   'dense_1_kernel_0',
                   'dense_1_bias_0']

weights = model.get_weights()
trainable_weights = model.trainable_weights
for parameter in range(len(model.weights)):
    print('')
    # using Keras methods is the correct way
    print('parameter: ', trainable_weights[parameter])
    print('parameter Keras: ', weights[parameter])
    # using session with TF is the wrong way
    print('parameter TF: ', model.weights[parameter].eval(session=sess))
    #np.save(parameter_names[parameter], model.weights[parameter].eval(session=sess))
    #np.save(parameter_names[parameter], weights[parameter])

这会在屏幕上显示以下内容:

This prints the following to screen:

parameter:  <tf.Variable 'lstm_1/kernel:0' shape=(1, 12) dtype=float32_ref>
parameter Keras:  [[ 0.02005039  0.59627813 -0.77670902 -0.17643917  0.64905447 -0.49418128
   0.01204901  0.79791737 -1.58887422 -0.3566488   0.67758918  0.77245694]]
parameter TF:  [[-0.20346385 -0.07166874 -0.58842945  0.03744811  0.46911311 -0.0469712
  -0.07291448  0.27316415 -0.53298378  0.08367682  0.10194337  0.20933461]]

parameter:  <tf.Variable 'lstm_1/recurrent_kernel:0' shape=(3, 12) dtype=float32_ref>
parameter Keras:  [[ 0.01916649 -0.30881727 -0.07018201  0.28770521 -0.45713434 -0.33738521
   0.53091544 -0.78456688  0.50647908  0.12326431 -0.18517831 -0.28752103]
 [ 0.44490865 -0.09020164  1.00983524  0.43070397 -0.14646551 -0.53908533
   1.33833826  0.76106179 -1.28808987  0.71029669 -0.19338571 -0.30499896]
 [ 0.76727188 -0.10291406  0.53285897  0.31021088  0.46876401  0.04961515
   0.0573149   1.17765784 -0.45716232  0.26181531  0.60458028 -0.6042906 ]]
parameter TF:  [[-0.044281   -0.42013288 -0.06702472  0.16710882  0.07229936  0.20263752
   0.01935999 -0.65925431  0.21676332  0.02481769  0.50321299 -0.08369029]
 [-0.17725646 -0.14031938 -0.07758044 -0.39292315  0.36675838 -0.20198873
   0.59491426 -0.12469263  0.14705807  0.39603388 -0.25511321 -0.01221756]
 [ 0.51603764  0.34401873  0.36002275  0.05344227 -0.00293417 -0.36086732
   0.1636388  -0.24916036  0.09064917 -0.04246153  0.05563453 -0.5006755 ]]

parameter:  <tf.Variable 'lstm_1/bias:0' shape=(12,) dtype=float32_ref>
parameter Keras:  [  3.91339064e-01  -2.09703773e-01  -4.88098420e-04   1.15376031e+00
   6.24452651e-01   2.24053934e-01   4.06851530e-01   4.78419960e-01
   1.77846551e-01   3.19107175e-01   5.16630232e-01  -2.22970009e-01]
parameter TF:  [ 0.  0.  0.  1.  1.  1.  0.  0.  0.  0.  0.  0.]

parameter:  <tf.Variable 'lstm_2/kernel:0' shape=(3, 4) dtype=float32_ref>
parameter Keras:  [[ 2.01334882  1.9168334   1.77633524 -0.90856379]
 [ 1.17618477  1.02978265 -0.06435115  0.66180402]
 [-1.33014703 -0.71629387 -0.87376142  1.35648465]]
parameter TF:  [[ 0.83115911  0.72150767  0.51600969 -0.52725452]
 [ 0.53043616  0.59162521 -0.59219611  0.0951736 ]
 [-0.8030411  -0.00424314 -0.06715947  0.67533839]]

parameter:  <tf.Variable 'lstm_2/recurrent_kernel:0' shape=(1, 4) dtype=float32_ref>
parameter Keras:  [[-0.09348518 -0.7667768   0.24031806 -0.39155772]]
parameter TF:  [[-0.085137   -0.59010917  0.61000961 -0.52193022]]

parameter:  <tf.Variable 'lstm_2/bias:0' shape=(4,) dtype=float32_ref>
parameter Keras:  [ 1.21466994  2.22224903  1.34946632  0.19186479]
parameter TF:  [ 0.  1.  0.  0.]

parameter:  <tf.Variable 'dense_1/kernel:0' shape=(1, 1) dtype=float32_ref>
parameter Keras:  [[ 2.69569159]]
parameter TF:  [[ 1.5422312]]

parameter:  <tf.Variable 'dense_1/bias:0' shape=(1,) dtype=float32_ref>
parameter Keras:  [ 0.20767514]
parameter TF:  [ 0.]

因此前向密码是正确的.权重错误.正确的权重.npy文件也已在问题中提到的链接处更新.前向传递可用于通过回收输出来说明LSTM的序列生成.

The forward pass code was therefore correct.The weights were wrong.The correct weights .npy files have also been updated at the link mentioned in the question. This forward pass can be used to illustrate sequence generation with LSTM by recycling the output.

这篇关于keras获悉LSTM网络中的前向通行证的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆