卷积编码器错误-'RuntimeError:输入形状和目标形状不匹配' [英] Convolutional encoder error - 'RuntimeError: input and target shapes do not match'

查看:418
本文介绍了卷积编码器错误-'RuntimeError:输入形状和目标形状不匹配'的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

在下面的代码中,创建,保存了三个图像,并且卷积自动编码器尝试将它们编码为较低维的表示形式.

In below code three images are created, saved and a convolutional auto-encoder attempts to encode them to a lower dimensional representation.

%reset -f

import torch.utils.data as data_utils
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import metrics
import datetime
from sklearn.preprocessing import MultiLabelBinarizer
import seaborn as sns
sns.set_style("darkgrid")
from ast import literal_eval
import numpy as np
from sklearn.preprocessing import scale
import seaborn as sns
sns.set_style("darkgrid")
import torch
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
from os import listdir
import cv2
import torch.nn.functional as F
import numpy as np
from numpy.polynomial.polynomial import polyfit
import matplotlib.pyplot as plt


number_channels = 3

%matplotlib inline

x = np.arange(10)
m = 1
b = 2
y = x * x
plt.plot(x, y)
plt.axis('off')
plt.savefig('1-increasing.jpg')

x = np.arange(10)
m = 0.01
b = 2
y = x * x * x
plt.plot(x, y)
plt.axis('off')
plt.savefig('2-increasing.jpg')

x = np.arange(10)
m = 0
b = 2
y = (m*x)+b
plt.plot(x, y)
plt.axis('off')
plt.savefig('constant.jpg')

batch_size_value = 2

train_image = []

train_image.append(cv2.imread('1-increasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('2-increasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('decreasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('constant.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))


data_loader = data_utils.DataLoader(train_image, batch_size=batch_size_value, shuffle=False,drop_last=True)

import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
import os

def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x


num_epochs = 100
# batch_size = 128
batch_size = 2

learning_rate = 1e-3
dataloader = data_loader

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
#         torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 5, 5
            nn.Conv2d(16, 8, 3, stride=2, padding=1),  # b, 8, 3, 3
            nn.ReLU(True),
            nn.MaxPool2d(3, stride=1)  # b, 8, 2, 2
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 2, stride=1),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 3, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 3, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


model = autoencoder().cuda().double()

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
                             weight_decay=1e-5)

for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = img.double()
        img = Variable(img).cuda()
        img = img.unsqueeze_(0)

        # ===================forward=====================
        output = model(img)
        loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log=================to_img=======
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, loss.data[0]))

torch.save(model.state_dict(), './conv_autoencoder.pth')

但是返回错误:

RuntimeError: input and target shapes do not match: input [1 x 3 x 132 x 204], target [1 x 3 x 288 x 432] at /pytorch/aten/src/THCUNN/generic/MSECriterion.cu:15

图像的形状为(3, 288, 432).如何更改模型的配置以允许使用[1 x 3 x 288 x 432]而不是[1 x 3 x 132 x 204]吗?

The shape of the images are (3, 288, 432) . How to change the configuration of the model to allow [1 x 3 x 288 x 432] instead of [1 x 3 x 132 x 204] ?

更新:

我改变了

nn.ConvTranspose2d(8, 3, 2, stride=2, padding=1)

至:

nn.ConvTranspose2d(8, 3, 3, stride=4, padding=2)

这将导致更接近的尺寸输出,但不是精确的,因此现在出现错误:

Which results in closer dimensional output but not exact so error is now :

RuntimeError: input and target shapes do not match: input [1 x 3 x 263 x 407], target [1 x 3 x 288 x 432] at /pytorch/aten/src/THCUNN/generic/MSECriterion.cu:12

如何计算输出解码器的尺寸以产生正确的尺寸?

How should the output decoder dimensions be calculated in order to produce the correct dimension ?

推荐答案

有两种方法, 这是一种解决方案:

There are a couple of ways, Here is the one solution:

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
#         torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 5, 5
            nn.Conv2d(16, 8, 3, stride=2, padding=1),  # b, 8, 3, 3
            nn.ReLU(True),
            nn.MaxPool2d(3, stride=1)  # b, 8, 2, 2
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 2, stride=1),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 3, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 3, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.ReLU(True),
            nn.ConvTranspose2d(3, 3, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.ReLU(True),
            nn.ConvTranspose2d(3, 3, 25, stride=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(3, 3, 3, stride=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

这是公式;

N ->输入大小, F ->过滤器大小,跨度->跨度大小, pdg ->填充大小

N --> Input Size, F --> Filter Size, stride-> Stride Size, pdg-> Padding size

ConvTranspose2d;

ConvTranspose2d;

OutputSize = N *步幅+ F-步幅-pdg * 2

Conv2d;

OutputSize =(N-F)/stride +1 + pdg * 2/stride [例如32/3 = 10,它在逗号后会忽略]

OutputSize = (N - F)/stride + 1 + pdg*2/stride [e.g. 32/3=10 it ignores after the comma]

这篇关于卷积编码器错误-'RuntimeError:输入形状和目标形状不匹配'的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆