Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others


0 votes
in Technique[技术] by (71.8m points)

pytorch - Expected more than 1 value per channel when training, got input size torch.Size([1, **])

I met an error when I use BatchNorm1d, code:

##% first I set a model
class net(nn.Module):
    def __init__(self, max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num__rnn_layers, bidirectional, batch_first=True, p=0.2):
        super(net, self).__init__()
        self.max_len = max_len
        self.feature_linear = feature_linear
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional == True else 1
        self.p = p
        self.batch_first = batch_first
        self.linear1 = nn.Linear(max_len, feature_linear) 
        init.kaiming_normal_(self.linear1.weight, mode='fan_in')
        self.BN1 = BN(feature_linear) 
    def forward(self, xb, seq_len_crt):
        rnn_input = torch.zeros(xb.shape[0], self.feature_linear, self.input_size)
        for i in range(self.input_size): 
            out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(1,34), out.shape(1,100)
            out = F.relu(out) # 输入:out.shape(1,100), 输出:out.shape(1,100)
            out = self.BN1(out) # 输入:out.shape(1,100),输出:out.shape(1,100)
        return y_hat.squeeze(-1)

##% make the model as a function and optimize it
input_size = 5
hidden_size = 32
output_dim = 1
num_rnn_layers = 2
bidirectional = True
rnn = nn.LSTM
batch_size = batch_size
feature_linear = 60
BN = nn.BatchNorm1d

model = net(max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num_rnn_layers, bidirectional, p=0.1)
loss_func = nn.MSELoss(reduction='none')
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)

##% use this model to predict data   
def predict(xb, model, seq_len):
    # xb's shape should be (batch_size, seq_len, n_features)
    if xb.ndim == 2:  # suitable for both ndarray and Tensor   
       # add a {batch_size} dim
       xb = xb[None, ]
    if not isinstance(xb, torch.Tensor):  
       xb = torch.Tensor(xb)
    return model(xb, seq_len) # xb.shape(1,34,5)

##% create training/valid/test data    
seq_len_train_iter = []
for i in range(0, len(seq_len_train), batch_size):
    if i + batch_size <= len(seq_len_train):
seq_len_valid_iter = []
for i in range(0, len(seq_len_valid), batch_size):
    if i + batch_size <= len(seq_len_valid):
seq_len_test_iter = []
for i in range(0, len(seq_len_test), batch_size):
    if i + batch_size <= len(seq_len_test):

##% fit model
def fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter):
    train_loss_record = []
    valid_loss_record = []
    mean_pct_final = []
    mean_abs_final = []
    is_better = False
    last_epoch_abs_error = 0
    last_epoch_pct_error = 0

    mean_pct_final_train = []
    mean_abs_final_train = []
    for epoch in range(epochs):
        # seq_len_crt: current batch seq len
        for batches, ((xb, yb), seq_len_crt) in enumerate(zip(train_dl, seq_len_train_iter)):
            if isinstance(seq_len_crt, np.int64):   
                seq_len_crt = [seq_len_crt]
            y_hat = model(xb, seq_len_crt)
            packed_yb = nn.utils.rnn.pack_padded_sequence(yb, seq_len_crt, batch_first=True, enforce_sorted=False)
            final_yb, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_yb)
            final_yb = final_yb.permute(1, 0)
            # assert torch.all(torch.tensor(seq_len_crt).eq(input_sizes))
            loss = loss_func(y_hat, final_yb)

            batch_size_crt = final_yb.shape[0]
            loss = (loss.sum(-1) / input_sizes).sum() / batch_size_crt

            # scheduler.step()
#             print(i)

            with torch.no_grad():
                if batches % 50 == 0 and epoch % 1 == 0:
#                     print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')

                    y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
                    label = yb[0][:len(y_hat)]
                    # plt.ion()
                    plt.plot(y_hat, label='predicted')
                    plt.plot(label, label='label')
                    plt.legend(loc='upper right')
                    plt.title('training mode')
                    plt.text(len(y_hat)+1, max(y_hat.max(), label.max()), f'Epoch {epoch}, batch {batches} training loss: {loss.item()}')
    return train_loss_record

but I met:Expected more than 1 value per channel when training, got input size torch.Size([1, 60]) the error message is:

ValueError                                Traceback (most recent call last)
<ipython-input-119-fb062ad3f20e> in <module>
----> 1 fit(500, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)

<ipython-input-118-2eb946c379bf> in fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
     38 #                     print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
---> 40                     y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
     41                     label = yb[0][:len(y_hat)]
     42                     # plt.ion()

<ipython-input-116-28afce77e325> in predict(xb, model, seq_len)
      7     if not isinstance(xb, torch.Tensor):
      8         xb = torch.Tensor(xb)
----> 9     return model(xb, seq_len) # xb.shape(None,34,5)

nmodulesmodule.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-114-3e9c30d20ed6> in forward(self, xb, seq_len_crt)
     50             out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(None,34), out.shape(None,100)
     51             out = F.relu(out) # 输入:out.shape(None,100), 输出:out.shape(None,100)
---> 52             out = self.BN1(out) # 输入:out.shape(None,100),输出:out.shape(None,100)
     54             out = self.linear2(out)

nmodulesmodule.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

nmodulesatchnorm.py in forward(self, input)
    129         used for normalization (i.e. in eval mode when buffers are not None).
    130         """
--> 131         return F.batch_norm(
    132             input,
    133             # If buffers are not to be tracked, ensure that they won't be updated

nfunctional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
   2052                 bias=bias, training=training, momentum=momentum, eps=eps)
   2053     if training:
-> 2054         _verify_batch_size(input.size())
   2056     return torch.batch_norm(

nfunctional.py in _verify_batch_size(size)
   2035         size_prods *= size[i + 2]
   2036     if size_prods == 1:
-> 2037         raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 60])

I have checked and I found that in out = self.BN1(out),out.shape = (1,60),it seems that batchsize=1 is not permitted in BatchNorm1d .But I don't know how to modify it.

question from:https://stackoverflow.com/questions/65882526/expected-more-than-1-value-per-channel-when-training-got-input-size-torch-size

Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)

what does BatchNorm1d do mathematically?
try and write down the equation for the case of batch_size=1 and you'll understand why pytorch is angry with you.

How to solve it?
It is simple: BatchNorm has two "modes of operation": one is for training where it estimates the current batch's mean and variance (this is why you must have batch_size>1 for training).
The other "mode" is for evaluation: it uses accumulated mean and variance to normalize new inputs without re-estimating the mean and variance. In this mode there is no problem processing samples one by one.

When evaluating your model use model.eval() before and model.train() after.

Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question
