Pytorch freeze part of the layers

Build a toy model

import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
class Net(nn.Module):

def __init__(self):
super().__init__()
self.fc1 = nn.Linear(2, 4)
self.relu1 = nn.ReLU()
#self.dout = nn.Dropout(0.2)
self.fc2 = nn.Linear(4, 3)
self.relu2 = nn.ReLU(1)
self.out = nn.Linear(3, 1)
self.out_act = nn.Sigmoid()

def forward(self, inputs):
a1 = self.fc1(inputs)
h1 = self.relu1(a1)
a2 = self.fc2(h1)
h2 = self.relu2(a2)
a3 = self.out(h2)
y = self.out_act(a3)
return y
>>> import torch.nn as nn
>>> from torch.autograd import Variable
>>> import torch.optim as optim
>>> class Net(nn.Module):
...
... def __init__(self):
... super().__init__()
... self.fc1 = nn.Linear(2, 4)
... self.relu1 = nn.ReLU()
... #self.dout = nn.Dropout(0.2)
... self.fc2 = nn.Linear(4, 3)
... self.relu2 = nn.ReLU(1)
... self.out = nn.Linear(3, 1)
... self.out_act = nn.Sigmoid()
...
... def forward(self, inputs):
... a1 = self.fc1(inputs)
... h1 = self.relu1(a1)
... a2 = self.fc2(h1)
... h2 = self.relu2(a2)
... a3 = self.out(h2)
... y = self.out_act(a3)
... return y
...

Output the parameters

>>> net = Net()
>>> for para in net.parameters():
... print(para)
...
Parameter containing:
tensor([[-0.1833, -0.3816],
[-0.4710, 0.3696],
[-0.1283, 0.6524],
[ 0.5941, -0.3848]], requires_grad=True)
Parameter containing:
tensor([ 0.6866, -0.5329, 0.6027, -0.6733], requires_grad=True)
Parameter containing:
tensor([[-0.0581, -0.1891, -0.4522, -0.1631],
[ 0.4902, 0.0954, -0.2497, -0.4682],
[ 0.0365, -0.0082, -0.4446, -0.0282]], requires_grad=True)
Parameter containing:
tensor([-0.1120, 0.0109, -0.0787], requires_grad=True)
Parameter containing:
tensor([[ 0.3279, 0.1556, -0.1405]], requires_grad=True)
Parameter containing:
tensor([0.1250], requires_grad=True)

Set requires_grad to False

>>> for para in net.parameters():
... para.requires_grad = False
... print(para)
...
Parameter containing:
tensor([[-0.1833, -0.3816],
[-0.4710, 0.3696],
[-0.1283, 0.6524],
[ 0.5941, -0.3848]])
Parameter containing:
tensor([ 0.6866, -0.5329, 0.6027, -0.6733])
Parameter containing:
tensor([[-0.0581, -0.1891, -0.4522, -0.1631],
[ 0.4902, 0.0954, -0.2497, -0.4682],
[ 0.0365, -0.0082, -0.4446, -0.0282]])
Parameter containing:
tensor([-0.1120, 0.0109, -0.0787])
Parameter containing:
tensor([[ 0.3279, 0.1556, -0.1405]])
Parameter containing:
tensor([0.1250])

Set requires_grad back to True

>>> for para in net.parameters():
... para.requires_grad = True
... print(para)
...
Parameter containing:
tensor([[-0.1833, -0.3816],
[-0.4710, 0.3696],
[-0.1283, 0.6524],
[ 0.5941, -0.3848]], requires_grad=True)
Parameter containing:
tensor([ 0.6866, -0.5329, 0.6027, -0.6733], requires_grad=True)
Parameter containing:
tensor([[-0.0581, -0.1891, -0.4522, -0.1631],
[ 0.4902, 0.0954, -0.2497, -0.4682],
[ 0.0365, -0.0082, -0.4446, -0.0282]], requires_grad=True)
Parameter containing:
tensor([-0.1120, 0.0109, -0.0787], requires_grad=True)
Parameter containing:
tensor([[ 0.3279, 0.1556, -0.1405]], requires_grad=True)
Parameter containing:
tensor([0.1250], requires_grad=True)

Freeze part of the parameter

For example, only freeze the fc1 layer.

>>> params = net.state_dict()
>>> params.keys()
odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'out.weight', 'out.bias'])

set related layer’s require grads to False (a naive way)


>>> keys = list(params.keys())
>>> keys[0]
'fc1.weight'
>>> net.fc1.weight.requires_grad = False
>>> for para in net.parameters():print(para)
...
Parameter containing:
tensor([[-0.1833, -0.3816],
[-0.4710, 0.3696],
[-0.1283, 0.6524],
[ 0.5941, -0.3848]])
Parameter containing:
tensor([ 0.6866, -0.5329, 0.6027, -0.6733], requires_grad=True)
Parameter containing:
tensor([[-0.0581, -0.1891, -0.4522, -0.1631],
[ 0.4902, 0.0954, -0.2497, -0.4682],
[ 0.0365, -0.0082, -0.4446, -0.0282]], requires_grad=True)
Parameter containing:
tensor([-0.1120, 0.0109, -0.0787], requires_grad=True)
Parameter containing:
tensor([[ 0.3279, 0.1556, -0.1405]], requires_grad=True)
Parameter containing:
tensor([0.1250], requires_grad=True)

A better way

We can identify the parameter by name[2]:

>>> net.fc1.weight.requires_grad = True
>>> for name, param in net.named_parameters():
... if param.requires_grad:print(name)
...
fc1.weight
fc1.bias
fc2.weight
fc2.bias
out.weight
out.bias
>>> for name, param in net.named_parameters():
... if param.requires_grad and 'fc1' in name:
... param.requires_grad = False
...
>>> for name, param in net.named_parameters():print(name, param)
...
fc1.weight Parameter containing:
tensor([[-0.1833, -0.3816],
[-0.4710, 0.3696],
[-0.1283, 0.6524],
[ 0.5941, -0.3848]])
fc1.bias Parameter containing:
tensor([ 0.6866, -0.5329, 0.6027, -0.6733])
fc2.weight Parameter containing:
tensor([[-0.0581, -0.1891, -0.4522, -0.1631],
[ 0.4902, 0.0954, -0.2497, -0.4682],
[ 0.0365, -0.0082, -0.4446, -0.0282]], requires_grad=True)
fc2.bias Parameter containing:
tensor([-0.1120, 0.0109, -0.0787], requires_grad=True)
out.weight Parameter containing:
tensor([[ 0.3279, 0.1556, -0.1405]], requires_grad=True)
out.bias Parameter containing:
tensor([0.1250], requires_grad=True)

Last one more step

We haven’t done yet as even the required grad is set to False, we still can update the weights

>>> net.fc1.weight -= 0.1*net.fc1.weight
>>> for name, param in net.named_parameters():print(name, param)
...
fc1.weight Parameter containing:
tensor([[-0.1650, -0.3435],
[-0.4239, 0.3326],
[-0.1154, 0.5872],
[ 0.5347, -0.3463]])
fc1.bias Parameter containing:
tensor([ 0.6866, -0.5329, 0.6027, -0.6733])
fc2.weight Parameter containing:
tensor([[-0.0581, -0.1891, -0.4522, -0.1631],
[ 0.4902, 0.0954, -0.2497, -0.4682],
[ 0.0365, -0.0082, -0.4446, -0.0282]], requires_grad=True)
fc2.bias Parameter containing:
tensor([-0.1120, 0.0109, -0.0787], requires_grad=True)
out.weight Parameter containing:
tensor([[ 0.3279, 0.1556, -0.1405]], requires_grad=True)
out.bias Parameter containing:
tensor([0.1250], requires_grad=True)
optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=0.1)>>> for p in filter(lambda p: p.requires_grad, net.parameters()):print(p)
...
Parameter containing:
tensor([[-0.0581, -0.1891, -0.4522, -0.1631],
[ 0.4902, 0.0954, -0.2497, -0.4682],
[ 0.0365, -0.0082, -0.4446, -0.0282]], requires_grad=True)
Parameter containing:
tensor([-0.1120, 0.0109, -0.0787], requires_grad=True)
Parameter containing:
tensor([[ 0.3279, 0.1556, -0.1405]], requires_grad=True)
Parameter containing:
tensor([0.1250], requires_grad=True)
>>>

Quick summary

we can use

  • If we know our target layer to be frozen, we can then freeze the layers by filtering by names

Reference

[1]https://discuss.pytorch.org/t/how-the-pytorch-freeze-network-in-some-layers-only-the-rest-of-the-training/7088

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store