机器学习的正向输入(LinearFunction)中混合了不兼容的数组类型 [英] incompatible array types are mixed in the forward input (LinearFunction) in machine learning

查看：52 发布时间：2021/4/29 20:52:27 pandas machine-learning deep-learning q-learning chainer

本文介绍了机器学习的正向输入(LinearFunction)中混合了不兼容的数组类型的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我已经使用Chanier训练了一个深度Q学习模型:

I have trained a deep Q-Learning model using Chanier:

class Q_Network (chainer.Chain):

        def __init__(self, input_size, hidden_size, output_size):
            super (Q_Network, self).__init__ (
                fc1=L.Linear (input_size, hidden_size),
                fc2=L.Linear (hidden_size, hidden_size),
                fc3=L.Linear (hidden_size, output_size)
            )

        def __call__(self, x):
            h = F.relu (self.fc1 (x))
            h = F.relu (self.fc2 (h))
            y = self.fc3 (h)
            return y

        def reset(self):
            self.zerograds ()

def train_dqn(env):
    Q = Q_Network (input_size=env.history_t + 1, hidden_size=100, output_size=3)
    Q_ast = copy.deepcopy (Q)
    optimizer = chainer.optimizers.Adam ()
    optimizer.setup (Q)

    epoch_num = 50
    step_max = len (env.data) - 1
    memory_size = 200
    batch_size = 20
    #     epsilon = 1.0
    epsilon = 0.9
    epsilon_decrease = 1e-3
    epsilon_min = 0.1
    start_reduce_epsilon = 200
    train_freq = 10
    update_q_freq = 20
    #     gamma = 0.97
    gamma = 0.9
    show_log_freq = 5

    memory = []
    total_step = 0
    total_rewards = []
    total_losses = []

    start = time.time ()
    for epoch in range (epoch_num):

        pobs = env.reset ()
        step = 0
        done = False
        total_reward = 0
        total_loss = 0

        while not done and step < step_max:

            # select act
            pact = np.random.randint (3)
            if np.random.rand () > epsilon:
                pact = Q (np.array (pobs, dtype=np.float32).reshape (1, -1))
                pact = np.argmax (pact.data)

            # act
            obs, reward, done = env.step (pact)

            # add memory
            memory.append ((pobs, pact, reward, obs, done))
            if len (memory) > memory_size:
                memory.pop (0)

            # train or update q
            if len (memory) == memory_size:
                if total_step % train_freq == 0:
                    shuffled_memory = np.random.permutation (memory)
                    memory_idx = range (len (shuffled_memory))
                    for i in memory_idx[::batch_size]:
                        batch = np.array (shuffled_memory[i:i + batch_size])
                        b_pobs = np.array (batch[:, 0].tolist (), dtype=np.float32).reshape (batch_size, -1)
                        b_pact = np.array (batch[:, 1].tolist (), dtype=np.int32)
                        b_reward = np.array (batch[:, 2].tolist (), dtype=np.int32)
                        b_obs = np.array (batch[:, 3].tolist (), dtype=np.float32).reshape (batch_size, -1)
                        b_done = np.array (batch[:, 4].tolist (), dtype=np.bool)

                        q = Q (b_pobs)
                        maxq = np.max (Q_ast (b_obs).data, axis=1)
                        target = copy.deepcopy (q.data)
                        for j in range (batch_size):
                            target[j, b_pact[j]] = b_reward[j] + gamma * maxq[j] * (not b_done[j])
                        Q.reset ()
                        loss = F.mean_squared_error (q, target)
                        total_loss += loss.data
                        loss.backward ()
                        optimizer.update ()

                if total_step % update_q_freq == 0:
                    Q_ast = copy.deepcopy (Q)

            # epsilon
            if epsilon > epsilon_min and total_step > start_reduce_epsilon:
                epsilon -= epsilon_decrease

            # next step
            total_reward += reward
            pobs = obs
            step += 1
            total_step += 1

        total_rewards.append (total_reward)
        total_losses.append (total_loss)

        if (epoch + 1) % show_log_freq == 0:
            log_reward = sum (total_rewards[((epoch + 1) - show_log_freq):]) / show_log_freq
            log_loss = sum (total_losses[((epoch + 1) - show_log_freq):]) / show_log_freq
            elapsed_time = time.time () - start
            print ('\t'.join (map (str, [epoch + 1, epsilon, total_step, log_reward, log_loss, elapsed_time])))
            start = time.time ()

    return Q, total_losses, total_rewards

if __name__ == "__main__":
    Q, total_losses, total_rewards = train_dqn (Environment1 (train))
    serializers.save_npz(r'C:\Users\willi\Desktop\dqn\dqn.model', Q)

保存模型后，我再次调用模型并向其中输入数据以进行预测:

After saved the model,I call the model again and feed data in it to let it predict:

加载模型:

model = Q_Network (input_size=91, hidden_size=100, output_size=3)
serializers.load_npz(r'C:\Users\willi\Desktop\dqn\dqn.model', model)

馈送一行数据:

 data = pd.read_csv (r'C:\Users\willi\Downloads\spyv.csv')

数据如下:

open    high     low    close   volume  datetime
0  236.250  239.01  236.22  238.205  2327395     30600
1  238.205  240.47  238.00  239.920  1506096     30660
2  239.955  240.30  238.85  239.700  1357531     30720
3  239.690  243.33  239.66  241.650  1265604     30780
4  241.570  242.13  240.20  240.490   896000     30840

现在预测:

x = data.iloc[1].to_numpy()
y = model(x)

但是错误提示:

IndexError: tuple index out of range

完整错误是:

  IndexError                                Traceback (most recent call last)
<ipython-input-7-b745008aa965> in <module>
     64 
     65 x = data.iloc[1].to_numpy()
---> 66 y = Q(x)
     67 
     68 

~\ddqn.ipynb in __call__(self, x)

~\Anaconda3\lib\site-packages\chainer\link.py in __call__(self, *args, **kwargs)
    285             # forward is implemented in the child classes
    286             forward = self.forward  # type: ignore
--> 287         out = forward(*args, **kwargs)
    288 
    289         # Call forward_postprocess hook

~\Anaconda3\lib\site-packages\chainer\links\connection\linear.py in forward(self, x, n_batch_axes)
    181             in_size = utils.size_of_shape(x.shape[n_batch_axes:])
    182             self._initialize_params(in_size)
--> 183         return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)

~\Anaconda3\lib\site-packages\chainer\functions\connection\linear.py in linear(x, W, b, n_batch_axes)
    306         args = x, W, b
    307 
--> 308     y, = LinearFunction().apply(args)
    309     if n_batch_axes > 1:
    310         y = y.reshape(batch_shape + (-1,))

~\Anaconda3\lib\site-packages\chainer\function_node.py in apply(self, inputs)
    305 
    306         if configuration.config.type_check:
--> 307             self._check_data_type_forward(in_data)
    308 
    309         self.check_layout_forward(input_vars)

~\Anaconda3\lib\site-packages\chainer\function_node.py in _check_data_type_forward(self, in_data)
    444         try:
    445             with type_check.light_mode:
--> 446                 self.check_type_forward(in_type)
    447             return
    448         except type_check.InvalidType:

~\Anaconda3\lib\site-packages\chainer\functions\connection\linear.py in check_type_forward(self, in_types)
     27             x_type.ndim == 2,
     28             w_type.ndim == 2,
---> 29             x_type.shape[1] == w_type.shape[1],
     30         )
     31         if type_check.eval(n_in) == 3:

IndexError: tuple index out of range

机器学习的正向输入(LinearFunction)中混合了不兼容的数组类型 [英] incompatible array types are mixed in the forward input (LinearFunction) in machine learning

问题描述

推荐答案

相关文章

AI人工智能最新文章

热门教程

热门工具

登录关闭

机器学习的正向输入(LinearFunction)中混合了不兼容的数组类型 [英] incompatible array types are mixed in the forward input (LinearFunction) in machine learning

问题描述

推荐答案

相关文章

AI人工智能最新文章

热门教程

热门工具

登录 关闭

登录关闭