update dataset configurations, add rotation module, and refine model settings for training, new hyperparameter tuning run for corrected datasets

2024-12-29 16:00:36 +01:00
parent 638b62ee03
commit 98305fdf47
10 changed files with 561 additions and 305 deletions
--- a/src/single-core-regen/regen.py
+++ b/src/single-core-regen/regen.py
@@ -1,6 +1,8 @@
 from datetime import datetime

 import optuna
+import torch
+import util
 from hypertraining.hypertraining import HyperTraining
 from hypertraining.settings import (
    GlobalSettings,
@@ -16,24 +18,29 @@ global_settings = GlobalSettings(
 )

 data_settings = DataSettings(
-    config_path="data/*-128-16384-100000-0-0-17-0-PAM4-0.ini",
+    # config_path="data/*-128-16384-100000-0-0-17-0-PAM4-0.ini",
+    config_path="data/20241204-131003-128-16384-100000-0-0-17-0-PAM4-0.ini",
    dtype="complex64",
    # symbols         = (9, 20), # 13 symbol @ 10GBd <-> 1.3ns <-> 0.26m of fiber
-    symbols=13,  # study: single_core_regen_20241123_011232
+    # symbols=13,  # study: single_core_regen_20241123_011232
+    # symbols = (3, 13),
+    symbols=4,
    # output_size     = (11, 32), # ballpark 26 taps -> 2 taps per input symbol -> 1 tap every 0.01m (model has 52 inputs)
-    output_size=26,  # study: single_core_regen_20241123_011232 (model_input_dim/2)
+    # output_size=26,  # study: single_core_regen_20241123_011232 (model_input_dim/2)
+    output_size=(8, 30),
    shuffle=True,
    in_out_delay=0,
    xy_delay=0,
-    drop_first=128 * 100,
+    drop_first=256,
    train_split=0.8,
+    randomise_polarisations=False,
 )

 pytorch_settings = PytorchSettings(
-    epochs=10000,
+    epochs=10,
    batchsize=2**10,
    device="cuda",
-    dataloader_workers=12,
+    dataloader_workers=4,
    dataloader_prefetch=4,
    summary_dir=".runs",
    write_every=2**5,
@@ -43,28 +50,70 @@ pytorch_settings = PytorchSettings(

 model_settings = ModelSettings(
    output_dim=2,
-    # n_hidden_layers = (3, 8),
-    n_hidden_layers=4,
-    overrides={
-        "n_hidden_nodes_0": 8,
-        "n_hidden_nodes_1": 6,
-        "n_hidden_nodes_2": 4,
-        "n_hidden_nodes_3": 8,
-    },
-    model_activation_func="Mag",
-    # satabsT0=(1e-6, 1),
+    n_hidden_layers = (2, 5),
+    n_hidden_nodes=(2, 16),
+    model_activation_func="EOActivation",
+    dropout_prob=0,
+    model_layer_function="ONNRect",
+    model_layer_kwargs={"square": True},
+    # scale=(False, True),
+    scale=False,
+    model_layer_parametrizations=[
+        {
+            "tensor_name": "weight",
+            "parametrization": util.complexNN.energy_conserving,
+        },
+        {
+            "tensor_name": "alpha",
+            "parametrization": util.complexNN.clamp,
+        },
+        {
+            "tensor_name": "gain",
+            "parametrization": util.complexNN.clamp,
+            "kwargs": {
+                "min": 0,
+                "max": float("inf"),
+            },
+        },
+        {
+            "tensor_name": "phase_bias",
+            "parametrization": util.complexNN.clamp,
+            "kwargs": {
+                "min": 0,
+                "max": 2 * torch.pi,
+            },
+        },
+        {
+            "tensor_name": "scales",
+            "parametrization": util.complexNN.clamp,
+        },
+        {
+            "tensor_name": "angle",
+            "parametrization": util.complexNN.clamp,
+            "kwargs": {
+                "min": -torch.pi,
+                "max": torch.pi,
+            },
+        },
+        {
+            "tensor_name": "loss",
+            "parametrization": util.complexNN.clamp,
+        },
+    ],
 )

 optimizer_settings = OptimizerSettings(
-    optimizer="Adam",
-    # learning_rate       = (1e-5, 1e-1),
-    learning_rate=5e-3
-    # learning_rate=5e-4,
+    optimizer="AdamW",
+    optimizer_kwargs={
+        "lr": 5e-3,
+        "amsgrad": True,
+        # "weight_decay": 1e-7,
+    },    
 )

 optuna_settings = OptunaSettings(
-    n_trials=1,
-    n_workers=1,
+    n_trials=1024,
+    n_workers=8,
    timeout=3600,
    directions=("minimize",),
    metrics_names=("mse",),