add readme, chapter 03 finished
This commit is contained in:
83
03_autograd.py
Normal file
83
03_autograd.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import torch
|
||||
|
||||
x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
|
||||
print(x)
|
||||
|
||||
y = x+2 # a function used in backprop for calculating the gradient is created
|
||||
# y.retain_grad() # for getting grad of y (a non-leaf tensor)
|
||||
print(y)
|
||||
|
||||
z = y*y*2
|
||||
z = y.mean()
|
||||
print(z)
|
||||
|
||||
z.backward() # no argument needed because z is scalar -> will calculate the gradient pretty accurately
|
||||
# print(y.grad)
|
||||
print(x.grad)
|
||||
|
||||
z = y*y*2
|
||||
print(z)
|
||||
# z.backward() will fail because z is not scalar -> create vector vor Jacobian-Vector product (JVP)
|
||||
# you have to specify the step size for the gradient approximation
|
||||
# (calculation via chain rule Jacobian * vector = gradient vector) vector is size of step for each element -> very small elements approximate the gradient well
|
||||
v = torch.tensor([0.000000001, 0.000000001, 0.000000001], dtype=torch.float32)
|
||||
z.backward(v) # pass vector to JVP
|
||||
print(x.grad)
|
||||
|
||||
|
||||
# prevent operation from being tracked by gradient tracking (requires_grad)
|
||||
# 3 options
|
||||
# 1. x.requires_grad_(False) -> turn off requires_grad completely
|
||||
# 2. x.detach() -> returns new tensor without requires_grad
|
||||
# 3. with torch.no_grad(): -> lets you do operations without grad tracking temporarily
|
||||
|
||||
x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
|
||||
y = x*x
|
||||
print(x)
|
||||
|
||||
# 1
|
||||
x.requires_grad_(False)
|
||||
print(x)
|
||||
|
||||
x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
|
||||
y = x*x
|
||||
print(x)
|
||||
|
||||
# 2
|
||||
z = x.detach()
|
||||
print(z)
|
||||
|
||||
x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
|
||||
y = x*x
|
||||
print(x)
|
||||
|
||||
#3
|
||||
with torch.no_grad():
|
||||
a = x+2
|
||||
print(a)
|
||||
|
||||
b = x+2
|
||||
print(b)
|
||||
|
||||
|
||||
# gradients will be summed up! -> empty gradients
|
||||
|
||||
#this is a dummy training
|
||||
weights = torch.ones(4, requires_grad=True)
|
||||
for epoch in range(3):
|
||||
model_output = (weights*3).sum()
|
||||
model_output.backward()
|
||||
print(weights.grad)
|
||||
weights.grad.zero_()# clear gradients
|
||||
|
||||
|
||||
#later
|
||||
optimizer = torch.optim.SGD(weights, lr=0.01) # stochastic gradient descent
|
||||
optimizer.step()
|
||||
optimizer.zero_grad() # clear gradients
|
||||
|
||||
# RECAP
|
||||
# turn on gradient tracking for interesting vectors (f(x) = x², f'(x) = ? -> requires_grad=True for x)
|
||||
# calculate gradient with f.backward(), specify step size for vectors (not needed for scalar functions like mean())
|
||||
# clear gradients with x.grad.zero_()
|
||||
# prevent operations from being tracked in the comp graph with one of the 3 options above
|
||||
Reference in New Issue
Block a user