From 0422c81f3b941fd1c3cfc49cc48ac9ea9795d1d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joseph=20Hopfm=C3=BCller?= Date: Sun, 24 Nov 2024 01:56:01 +0100 Subject: [PATCH] update single_core_regen settings new runs --- data/single_core_regen.db | 4 +- src/single-core-regen/regen.py | 91 +++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 41 deletions(-) diff --git a/data/single_core_regen.db b/data/single_core_regen.db index 696de71..eee89e7 100644 --- a/data/single_core_regen.db +++ b/data/single_core_regen.db @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7231dea2c9107f443de9122fdc971d9ce6df93db2ee27a9d68a5e22c986373eb -size 937984 +oid sha256:f3510d41f9f0605e438a09767c43edda38162601292be1207f50747117ae5479 +size 9863168 diff --git a/src/single-core-regen/regen.py b/src/single-core-regen/regen.py index 6f463e5..9301104 100644 --- a/src/single-core-regen/regen.py +++ b/src/single-core-regen/regen.py @@ -1,4 +1,6 @@ from datetime import datetime + +import optuna from hypertraining.hypertraining import HyperTraining from hypertraining.settings import ( GlobalSettings, @@ -10,59 +12,72 @@ from hypertraining.settings import ( ) global_settings = GlobalSettings( - seed = 42, + seed=42, ) data_settings = DataSettings( - config_path = "data/*-128-16384-100000-0-0-17-0-PAM4-0.ini", - dtype = ("complex128", "complex64", "float64", "float32"), - symbols = (1, 16), - model_input_dim = (1, 32), - shuffle = True, - in_out_delay = 0, - xy_delay = 0, - drop_first = 1000, - train_split = 0.8, + config_path="data/*-128-16384-100000-0-0-17-0-PAM4-0.ini", + dtype="complex64", + # symbols = (9, 20), # 13 symbol @ 10GBd <-> 1.3ns <-> 0.26m of fiber + symbols=13, # study: single_core_regen_20241123_011232 + # output_size = (11, 32), # ballpark 26 taps -> 2 taps per input symbol -> 1 tap every 0.01m (model has 52 inputs) + output_size=26, # study: single_core_regen_20241123_011232 (model_input_dim/2) + shuffle=True, + in_out_delay=0, + xy_delay=0, + drop_first=128 * 100, + train_split=0.8, ) pytorch_settings = PytorchSettings( - epochs = 25, - batchsize = 2**10, - device = "cuda", - dataloader_workers = 2, - dataloader_prefetch = 2, - summary_dir = ".runs", - write_every = 2**5, - model_dir = ".models", + epochs=10, + batchsize=2**10, + device="cuda", + dataloader_workers=2, + dataloader_prefetch=4, + summary_dir=".runs", + write_every=2**5, + save_models=True, + model_dir=".models", ) model_settings = ModelSettings( - output_dim = 2, - model_n_layers = (2, 8), - unit_count = (2, 16), - model_activation_func = ("ModReLU")#, "ZReLU", "Mag")#, "CReLU", "Identity"), + output_dim=2, + # n_hidden_layers = (3, 8), + n_hidden_layers=(4, 6), # study: single_core_regen_20241123_011232 + n_hidden_nodes=(4,20), + # overrides={ + # "n_hidden_nodes_0": (14, 20), # study: single_core_regen_20241123_011232 + # "n_hidden_nodes_1": (8, 16), + # "n_hidden_nodes_2": (10, 16), + # # "n_hidden_nodes_3": (4, 20), # study: single_core_regen_20241123_135749 + # "n_hidden_nodes_4": (2, 8), + # "n_hidden_nodes_5": (10, 16), + # }, + # model_activation_func = ("ModReLU", "Mag", "Identity") + model_activation_func="Mag", # study: single_core_regen_20241123_011232 ) optimizer_settings = OptimizerSettings( - optimizer = ("Adam", "RMSprop"),#, "SGD"), + optimizer="Adam", # learning_rate = (1e-5, 1e-1), - learning_rate=1e-3, - # scheduler = "ReduceLROnPlateau", - # scheduler_kwargs = {"mode": "min", "factor": 0.5, "patience": 10} + learning_rate=5e-4, ) optuna_settings = OptunaSettings( - n_trials = 4096, - n_threads = 16, - timeout = 600, - directions = ("minimize","minimize"), - metrics_names = ("n_params","mse"), - - limit_examples = True, - n_train_batches = 100, - n_valid_batches = 100, - storage = "sqlite:///data/single_core_regen.db", - study_name = f"single_core_regen_{datetime.now().strftime('%Y%m%d_%H%M%S')}", + n_trials=512, + n_workers=14, + timeout=3600, + directions=("maximize", "minimize"), + metrics_names=("neg_log_mse","n_nodes"), + limit_examples=True, + n_train_batches=500, + # n_valid_batches = 100, + storage="sqlite:///data/single_core_regen.db", + study_name=f"single_core_regen_{datetime.now().strftime('%Y%m%d_%H%M%S')}", + n_trials_filter=(optuna.trial.TrialState.COMPLETE, optuna.trial.TrialState.PRUNED), + pruner="MedianPruner", + pruner_kwargs=None ) @@ -78,8 +93,6 @@ if __name__ == "__main__": hyper_training.setup_study() - # hyper_training.resume_latest_study() - hyper_training.run_study() # best_trial = hyper_training.study.best_trial