正确形状的keras LSTM进料输入 [英] keras LSTM feeding input with the right shape

查看:125
本文介绍了正确形状的keras LSTM进料输入的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在从具有以下形状的熊猫数据框中获取一些数据

I am getting some data from a pandas dataframe with the following shape

df.head()
>>>
Value USD   Drop 7  Up 7    Mean Change 7   Change      Predict
0.06480     2.0     4.0     -0.000429       -0.00420    4
0.06900     1.0     5.0     0.000274        0.00403     2
0.06497     1.0     5.0     0.000229        0.00007     2
0.06490     1.0     5.0     0.000514        0.00200     2
0.06290     2.0     4.0     0.000229        -0.00050    3

前5列旨在用作X并预测y.这就是我预处理模型数据的方式

The first 5 columns are intended to be the X and predict the y. This is how I preprocess the data for the model

from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score
from keras.layers import LSTM
from sklearn import preprocessing

# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column.  Is it really this hard? :(
    target_type = df[target].dtypes
    target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
    else:
        # Regression
        return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)

# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df, name):
    le = preprocessing.LabelEncoder()
    df[name] = le.fit_transform(df[name])
    return le.classes_

df['Predict'].value_counts()
>>>
4    1194
3     664
2     623
0     405
1      14
Name: Predict, dtype: int64

predictions = encode_text_index(df, "Predict")
predictions
>>>
array([0, 1, 2, 3, 4], dtype=int64)

X,y = to_xy(df,"Predict")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False)

X_train
>>>
array([[ 6.4800002e-02,  2.0000000e+00,  4.0000000e+00, -4.2857142e-04,
        -4.1999999e-03],
       [ 6.8999998e-02,  1.0000000e+00,  5.0000000e+00,  2.7414286e-04,
         4.0300000e-03],
       [ 6.4970002e-02,  1.0000000e+00,  5.0000000e+00,  2.2857143e-04,
         7.0000002e-05],
       ...,
       [ 9.5987000e+02,  5.0000000e+00,  2.0000000e+00, -1.5831429e+01,
        -3.7849998e+01],
       [ 9.9771997e+02,  5.0000000e+00,  2.0000000e+00, -1.6948572e+01,
        -1.8250000e+01],
       [ 1.0159700e+03,  5.0000000e+00,  2.0000000e+00, -1.3252857e+01,
        -7.1700001e+00]], dtype=float32)

y_train
>>>
array([[0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       ...,
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.]], dtype=float32)

X_train[1]
>>>
array([6.8999998e-02, 1.0000000e+00, 5.0000000e+00, 2.7414286e-04,
       4.0300000e-03], dtype=float32)

X_train.shape
>>>
(2320, 5)

X_train[1].shape
>>>
(5,)

最后是LSTM模型(看起来也不是编写该模型的最佳方法,因此,如果是这样的话,还应该对内部层进行重写)

and finally the LSTM model (also it might look like not the best way to write one so will appreciate a rewrite of the inner layers as well if that's the case)

model = Sequential()
#model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, 1)))
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(50, dropout=0.2, return_sequences=True))
model.add(LSTM(50, dropout=0.2, return_sequences=True))
model.add(LSTM(50, dropout=0.2, return_sequences=True))
#model.add(Dense(50, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))

#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.fit(X_train, y_train, epochs=1000)

model.compile(loss='categorical_crossentropy', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-2, patience=15, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=0, save_best_only=True) # save best model

model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[monitor,checkpointer], verbose=2, epochs=1000)
model.load_weights('best_weights.hdf5') # load weights from best model

运行此命令将引发此错误

running this throws this error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-67-a17835a382f6> in <module>()
     15 checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=0, save_best_only=True) # save best model
     16 
---> 17 model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[monitor,checkpointer], verbose=2, epochs=1000)
     18 model.load_weights('best_weights.hdf5') # load weights from best model

c:\users\samuel\appdata\local\programs\python\python35\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
    948             sample_weight=sample_weight,
    949             class_weight=class_weight,
--> 950             batch_size=batch_size)
    951         # Prepare validation data.
    952         do_validation = False

c:\users\samuel\appdata\local\programs\python\python35\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
    747             feed_input_shapes,
    748             check_batch_axis=False,  # Don't enforce the batch size.
--> 749             exception_prefix='input')
    750 
    751         if y is not None:

c:\users\samuel\appdata\local\programs\python\python35\lib\site-packages\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    125                         ': expected ' + names[i] + ' to have ' +
    126                         str(len(shape)) + ' dimensions, but got array '
--> 127                         'with shape ' + str(data_shape))
    128                 if not check_batch_axis:
    129                     data_shape = data_shape[1:]

ValueError: Error when checking input: expected lstm_48_input to have 3 dimensions, but got array with shape (2320, 5)

我已经尝试了许多X_train输入形状的变化,但是每个形状都会引发一些错误,我还检查了 Keras文档,但尚不清楚应如何将数据馈入模型

I've tried a lot of variations of the X_train input shape but every single one throws some error, I also checked the Keras docs but it wasn't clear on how the data should be fed to the model

首先是重塑X_train

First is reshaping X_train

data = np.resize(X_train,(X_train.shape[0],1,X_train.shape[1]))
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=data.shape))

这失败并显示错误

ValueError: Input 0 is incompatible with layer lstm_52: expected ndim=3, found ndim=4 

建议我以

model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=X_train.shape[1:]))

抛出相同错误的

that throws the same error

ValueError: Input 0 is incompatible with layer lstm_63: expected ndim=3, found ndim=2

建议2

使用熊猫的默认X,y

Sugestion 2

use the default X,y from pandas

y = df['Predict']
X = df[['Value USD', 'Drop 7', 'Up 7', 'Mean Change 7', 'Change']]

X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False)

LSTM还希望通过以下方式输入(batch_size, timesteps, input_dim)

also that LSTM expect input in the following way (batch_size, timesteps, input_dim)

所以我尝试了这个

model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=(100, 100, X_train.shape)))

会引发此错误

TypeError: Error converting shape to a TensorShape: int() argument must be a string, a bytes-like object or a number, not 'tuple'.

和另一种方式

model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape=(100, 100, X_train[1].shape)))

返回相同的错误

TypeError: Error converting shape to a TensorShape: int() argument must be a string, a bytes-like object or a number, not 'tuple'.

推荐答案

您要设置具有多个功能的LSTM(有状态或无状态?),这些功能是列中的Value USD Drop 7 Up 7 Mean Change 7 Change列您的数据框.类似的问题在 https://github.com/keras-team/keras/issues /6471

You want to set up a LSTM ( stateful or stateless ? ) with multiple features, the features are the columns Value USD Drop 7 Up 7 Mean Change 7 Change in your dataframe. A similar problem is in https://github.com/keras-team/keras/issues/6471

Keras LSTM将输入作为(batch_size (number of samples processed at a time),timesteps,features) = (batch_size, timesteps, input_dim),因为您有5个功能input_dim = features = 5.我不知道您的全部数据,所以我不能说更多. number_of_samples(数据框中的行数)和batch_size的关系在 http://philipperemy.github.io/keras-stateful-lstm/batch_size是一次处理的样本(行)数(

Keras LSTMs accept input as (batch_size (number of samples processed at a time),timesteps,features) = (batch_size, timesteps, input_dim) As you have 5 features input_dim = features = 5. i do not know your entire data so i can not say more. The relation of number_of_samples ( number of rows in your dataframe ) and batch_size is in http://philipperemy.github.io/keras-stateful-lstm/, batch_size is the number of samples ( rows ) processed at a time ( doubts regarding batch size and time steps in RNN ) :

以不同的方式讲,每当您训练或测试LSTM时,您首先需要 建立形状为nb_samples, timesteps, input_dim的输入矩阵X 您的batch size除以nb_samples的位置.例如,如果 nb_samples=1024batch_size=64,这意味着您的模型将 接收64个样本的块,计算每个输出(无论数量多少 时间步长是针对每个样本的),平均梯度并传播 更新参数向量.

Said differently, whenever you train or test your LSTM, you first have to build your input matrix X of shape nb_samples, timesteps, input_dim where your batch size divides nb_samples. For instance, if nb_samples=1024 and batch_size=64, it means that your model will receive blocks of 64 samples, compute each output (whatever the number of timesteps is for every sample), average the gradients and propagate it to update the parameters vector.

source: http://philipperemy.github.io/keras-stateful-lstm/

批次大小对于培训很重要

batch size is important for training

批处理大小为1表示将使用 online拟合模型 培训(与批量培训小批量培训相对).作为一个 结果,预计模型拟合将有一定的差异.

A batch size of 1 means that the model will be fit using online training (as opposed to batch training or mini-batch training). As a result, it is expected that the model fit will have some variance.

源: https://machinelearningmastery.com/stateful- stateless-lstm-time-series-forecasting-python/

timesteps是要回顾的时间步数/过去的网络状态,由于性能原因,LSTM的最大值约为200-500(消失梯度问题),最大值约为200( https://github.com/keras-team/keras/issues/2057 )

timesteps is the number of timesteps / past network states you want to look back on, there is a maximal value for LSTMs of about 200-500 ( Vanishing Gradient problem ) for performance reason maximal value is about 200 ( https://github.com/keras-team/keras/issues/2057 )

拆分更容易(在pandas数据框中选择多个列 ):

y = df['Predict']
X = df[['Value USD','Drop 7','Up 7','Mean Change 7', 'Change']]

https://www.kaggle.com/mknorps中/titanic-with-decision-trees 是用于修改数据类型的代码

in https://www.kaggle.com/mknorps/titanic-with-decision-trees is code for modifying data types

已更新:

要摆脱这些错误,您必须重塑训练数据,如

to get rid of these errors you have to reshape the training data like in Error when checking model input: expected lstm_1_input to have 3 dimensions, but got array with shape (339732, 29) ( also contains reshaping code for more than 1 timestep ). i post entire code that worked for me because this question is less trivial than it appeared on first sight ( note the number of [ and ] that indicate the dimension of an array, when reshaping ) :

import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras.layers import LSTM
from sklearn import preprocessing

df = pd.read_csv('/path/data_lstm.dat')

y = df['Predict']
X = df[['Value USD', 'Drop 7', 'Up 7', 'Mean Change 7', 'Change']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False)

X_train_array = X_train.values  ( https://stackoverflow.com/questions/13187778/convert-pandas-dataframe-to-numpy-array-preserving-index )
y_train_array = y_train.values.reshape(4,1)

X_test_array = X_test.values
y_test_array = y_test.values


# reshaping to fit batch_input_shape=(4,1,5) batch_size, timesteps, number_of_features , batch_size can be varied batch_input_shape=(2,1,5), = (1,1,5),... is also working

X_train_array = np.reshape(X_train_array, (X_train_array.shape[0], 1, X_train_array.shape[1]))
#>>> X_train_array    NOTE THE NUMBER OF [ and ] !!
#array([[[ 6.480e-02,  2.000e+00,  4.000e+00, -4.290e-04, -4.200e-03]],

#       [[ 6.900e-02,  1.000e+00,  5.000e+00,  2.740e-04,  4.030e-03]],

#       [[ 6.497e-02,  1.000e+00,  5.000e+00,  2.290e-04,  7.000e-05]],

#       [[ 6.490e-02,  1.000e+00,  5.000e+00,  5.140e-04,  2.000e-03]]])
y_train_array = np.reshape(y_train_array, (y_train_array.shape[0], 1, y_train_array.shape[1]))
#>>> y_train_array     NOTE THE NUMBER OF [ and ]   !!
#array([[[4]],

#       [[2]],

#       [[2]],

#       [[2]]])



model = Sequential()
model.add(LSTM(32, return_sequences=True, batch_input_shape=(4,1,5) ))
model.add(LSTM(32, return_sequences=True ))
model.add(Dense(1, activation='softmax'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

这篇关于正确形状的keras LSTM进料输入的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆