add readme, chapter 03 finished

2022-10-16 23:10:06 +02:00
parent 13a8845ab3
commit b5ab293a9b
5 changed files with 253 additions and 1 deletions
--- a/03_autograd.py
+++ b/03_autograd.py
@@ -0,0 +1,83 @@
+import torch
+
+x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
+print(x)
+
+y = x+2 # a function used in backprop for calculating the gradient is created
+# y.retain_grad() # for getting grad of y (a non-leaf tensor)
+print(y)
+
+z = y*y*2
+z = y.mean()
+print(z)
+
+z.backward() # no argument needed because z is scalar -> will calculate the gradient pretty accurately
+# print(y.grad)
+print(x.grad)
+
+z = y*y*2
+print(z)
+# z.backward() will fail because z is not scalar -> create vector vor Jacobian-Vector product (JVP)  
+# you have to specify the step size for the gradient approximation
+# (calculation via chain rule Jacobian * vector = gradient vector) vector is size of step for each element -> very small elements approximate the gradient well
+v = torch.tensor([0.000000001, 0.000000001, 0.000000001], dtype=torch.float32)
+z.backward(v) # pass vector to JVP
+print(x.grad)
+
+
+# prevent operation from being tracked by gradient tracking (requires_grad)
+# 3 options
+# 1. x.requires_grad_(False) -> turn off requires_grad completely
+# 2. x.detach() -> returns new tensor without requires_grad
+# 3. with torch.no_grad(): -> lets you do operations without grad tracking temporarily
+
+x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
+y = x*x
+print(x)
+
+# 1
+x.requires_grad_(False)
+print(x)
+
+x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
+y = x*x
+print(x)
+
+# 2
+z = x.detach()
+print(z)
+
+x = torch.tensor([1.0,2.0,3.0], requires_grad=True)
+y = x*x
+print(x)
+
+#3
+with torch.no_grad():
+    a = x+2
+    print(a)
+
+b = x+2
+print(b)
+
+
+# gradients will be summed up! -> empty gradients
+
+#this is a dummy training
+weights = torch.ones(4, requires_grad=True)
+for epoch in range(3):
+    model_output = (weights*3).sum()
+    model_output.backward()
+    print(weights.grad)
+    weights.grad.zero_()# clear gradients
+
+
+#later
+optimizer = torch.optim.SGD(weights, lr=0.01) # stochastic gradient descent
+optimizer.step()
+optimizer.zero_grad() # clear gradients
+
+# RECAP
+# turn on gradient tracking for interesting vectors (f(x) = x², f'(x) = ? -> requires_grad=True for x)
+# calculate gradient with f.backward(), specify step size for vectors (not needed for scalar functions like mean())
+# clear gradients with x.grad.zero_()
+# prevent operations from being tracked in the comp graph with one of the 3 options above