update dataset configurations, add rotation module, and refine model settings for training, new hyperparameter tuning run for corrected datasets
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
from datetime import datetime
|
||||
|
||||
import optuna
|
||||
import torch
|
||||
import util
|
||||
from hypertraining.hypertraining import HyperTraining
|
||||
from hypertraining.settings import (
|
||||
GlobalSettings,
|
||||
@@ -16,24 +18,29 @@ global_settings = GlobalSettings(
|
||||
)
|
||||
|
||||
data_settings = DataSettings(
|
||||
config_path="data/*-128-16384-100000-0-0-17-0-PAM4-0.ini",
|
||||
# config_path="data/*-128-16384-100000-0-0-17-0-PAM4-0.ini",
|
||||
config_path="data/20241204-131003-128-16384-100000-0-0-17-0-PAM4-0.ini",
|
||||
dtype="complex64",
|
||||
# symbols = (9, 20), # 13 symbol @ 10GBd <-> 1.3ns <-> 0.26m of fiber
|
||||
symbols=13, # study: single_core_regen_20241123_011232
|
||||
# symbols=13, # study: single_core_regen_20241123_011232
|
||||
# symbols = (3, 13),
|
||||
symbols=4,
|
||||
# output_size = (11, 32), # ballpark 26 taps -> 2 taps per input symbol -> 1 tap every 0.01m (model has 52 inputs)
|
||||
output_size=26, # study: single_core_regen_20241123_011232 (model_input_dim/2)
|
||||
# output_size=26, # study: single_core_regen_20241123_011232 (model_input_dim/2)
|
||||
output_size=(8, 30),
|
||||
shuffle=True,
|
||||
in_out_delay=0,
|
||||
xy_delay=0,
|
||||
drop_first=128 * 100,
|
||||
drop_first=256,
|
||||
train_split=0.8,
|
||||
randomise_polarisations=False,
|
||||
)
|
||||
|
||||
pytorch_settings = PytorchSettings(
|
||||
epochs=10000,
|
||||
epochs=10,
|
||||
batchsize=2**10,
|
||||
device="cuda",
|
||||
dataloader_workers=12,
|
||||
dataloader_workers=4,
|
||||
dataloader_prefetch=4,
|
||||
summary_dir=".runs",
|
||||
write_every=2**5,
|
||||
@@ -43,28 +50,70 @@ pytorch_settings = PytorchSettings(
|
||||
|
||||
model_settings = ModelSettings(
|
||||
output_dim=2,
|
||||
# n_hidden_layers = (3, 8),
|
||||
n_hidden_layers=4,
|
||||
overrides={
|
||||
"n_hidden_nodes_0": 8,
|
||||
"n_hidden_nodes_1": 6,
|
||||
"n_hidden_nodes_2": 4,
|
||||
"n_hidden_nodes_3": 8,
|
||||
},
|
||||
model_activation_func="Mag",
|
||||
# satabsT0=(1e-6, 1),
|
||||
n_hidden_layers = (2, 5),
|
||||
n_hidden_nodes=(2, 16),
|
||||
model_activation_func="EOActivation",
|
||||
dropout_prob=0,
|
||||
model_layer_function="ONNRect",
|
||||
model_layer_kwargs={"square": True},
|
||||
# scale=(False, True),
|
||||
scale=False,
|
||||
model_layer_parametrizations=[
|
||||
{
|
||||
"tensor_name": "weight",
|
||||
"parametrization": util.complexNN.energy_conserving,
|
||||
},
|
||||
{
|
||||
"tensor_name": "alpha",
|
||||
"parametrization": util.complexNN.clamp,
|
||||
},
|
||||
{
|
||||
"tensor_name": "gain",
|
||||
"parametrization": util.complexNN.clamp,
|
||||
"kwargs": {
|
||||
"min": 0,
|
||||
"max": float("inf"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"tensor_name": "phase_bias",
|
||||
"parametrization": util.complexNN.clamp,
|
||||
"kwargs": {
|
||||
"min": 0,
|
||||
"max": 2 * torch.pi,
|
||||
},
|
||||
},
|
||||
{
|
||||
"tensor_name": "scales",
|
||||
"parametrization": util.complexNN.clamp,
|
||||
},
|
||||
{
|
||||
"tensor_name": "angle",
|
||||
"parametrization": util.complexNN.clamp,
|
||||
"kwargs": {
|
||||
"min": -torch.pi,
|
||||
"max": torch.pi,
|
||||
},
|
||||
},
|
||||
{
|
||||
"tensor_name": "loss",
|
||||
"parametrization": util.complexNN.clamp,
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
optimizer_settings = OptimizerSettings(
|
||||
optimizer="Adam",
|
||||
# learning_rate = (1e-5, 1e-1),
|
||||
learning_rate=5e-3
|
||||
# learning_rate=5e-4,
|
||||
optimizer="AdamW",
|
||||
optimizer_kwargs={
|
||||
"lr": 5e-3,
|
||||
"amsgrad": True,
|
||||
# "weight_decay": 1e-7,
|
||||
},
|
||||
)
|
||||
|
||||
optuna_settings = OptunaSettings(
|
||||
n_trials=1,
|
||||
n_workers=1,
|
||||
n_trials=1024,
|
||||
n_workers=8,
|
||||
timeout=3600,
|
||||
directions=("minimize",),
|
||||
metrics_names=("mse",),
|
||||
|
||||
Reference in New Issue
Block a user