# activation functions apply non-linear transform to layer output
# without activation functions, the model would just be a stacked linear regression model -> not suited for complex tasks


# step
# f(x) = 1 if x>=thresh else 0

# sigmoid
# f(x) = 1/(1+exp(-x))
# between 0 and 1, typically last layer in binary classification

# tanh
# f(x) = 2/(1+exp(-2x)) -1
# for hidden layers

# ReLU
# f(x) = max(0,x)
# if you don't know what to use, use ReLU ;)

# Leaky ReLU
# f(x) = x if x>=0, else a*x, a is very small
# improved ReLU, tries to solve vanishing gradient problem (against dead neurons)

# softmax
# f(x) = exp(y_i)/sum(exp(y_i))
# last layer in multiclass classification problem

import torch
import torch.nn as nn

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.layers = [
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            # nn.Sigmoid(),
            # nn.Softmax(),
            # nn.Tanh(),
            # nn.LeakyReLU(),
            nn.Linear(hidden_size, 1),
            nn.Sigmoid()
        ]

    def forward(self, x):
        out = x
        for layer in self.layers:
            out = layer(out)
        return out