How to get an output dimension for each layer of the Neural Network in Pytorch?
You can use torchsummary, for instance, for ImageNet dimension(3x224x224):
from torchvision import models
from torchsummary import summary
vgg = models.vgg16()
summary(vgg, (3, 224, 224)
Layer (type) Output Shape Param #
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
Linear-32 [-1, 4096] 102,764,544
ReLU-33 [-1, 4096] 0
Dropout-34 [-1, 4096] 0
Linear-35 [-1, 4096] 16,781,312
ReLU-36 [-1, 4096] 0
Dropout-37 [-1, 4096] 0
Linear-38 [-1, 1000] 4,097,000
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
Input size (MB): 0.57
Forward/backward pass size (MB): 218.59
Params size (MB): 527.79
Estimated Total Size (MB): 746.96
Source: model-summary-in-pytorch
Like David Ng's answer but a tad shorter:
def get_output_shape(model, image_dim):
return model(torch.rand(*(image_dim))).data.shape
In this example I needed to figure out the input of the last Linear layer:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.expected_input_shape = (1, 1, 192, 168)
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.maxpool1 = nn.MaxPool2d(2)
self.maxpool2 = nn.MaxPool2d(3)
# Calculate the input of the Linear layer
conv1_out = get_output_shape(self.maxpool1, get_output_shape(conv1, self.expected_input_shape))
conv2_out = get_output_shape(self.maxpool2, get_output_shape(conv2, conv1_out))
fc1_in = # Flatten
self.fc1 = nn.Linear(fc1_in, 38)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = F.relu(x)
x = self.maxpool2(x)
x = self.dropout1(x)
x = torch.flatten(x, 1) # flatten to a single dimension
x = self.fc1(x)
output = F.log_softmax(x, dim=1)
return output
This way, if I make changes to previous layers, I won't have to calculate all over again!
My answer is based on this answer
A simple way is:
- Pass the input to the model.
- Print the size of the output after passing every layer.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__() = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 16),
nn.Conv2d(in_channels = 16, out_channels = 16),
nn.Linear(4096, 64),
nn.Linear(64, 10))
def forward(self, x):
for layer in
x = layer(x)
return x
model = Model()
x = torch.randn(1, 3, 224, 224)
# Let's print it
But be careful with the input size because you are using
in your net. It would cause incompatible input size for nn.Linear if your input size is not4096