add SlicedDataset class and utility scripts; refactor: remove _path_fix.py and update imports;

2024-11-17 01:04:33 +01:00
parent 90aa6dbaf8
commit 87f40fc37c
7 changed files with 172 additions and 11 deletions
--- a/src/single-core-regen/testing/sliced_dataset_test.py
+++ b/src/single-core-regen/testing/sliced_dataset_test.py
@@ -0,0 +1,51 @@
+# move into dir single-core-regen before running
+
+from util.dataset import SlicedDataset
+from torch.utils.data import DataLoader
+from matplotlib import pyplot as plt
+import numpy as np
+
+def eye_dataset(dataset, no_symbols=None, offset=False, show=True):
+    if no_symbols is None:
+        no_symbols = len(dataset)
+    _, axs = plt.subplots(2,2, sharex=True, sharey=True)
+
+    xaxis = np.linspace(0,dataset.symbols_per_slice,dataset.samples_per_slice)
+    roll = dataset.samples_per_symbol//2 if offset else 0
+    for E_out, E_in in dataset[roll:dataset.samples_per_symbol*no_symbols+roll:dataset.samples_per_symbol]:
+        E_in_x, E_in_y, E_out_x, E_out_y = E_in[0], E_in[1], E_out[0], E_out[1]
+        axs[0,0].plot(xaxis, np.abs( E_in_x.numpy())**2, alpha=0.05, color='C0')
+        axs[1,0].plot(xaxis, np.abs( E_in_y.numpy())**2, alpha=0.05, color='C0')
+        axs[0,1].plot(xaxis, np.abs(E_out_x.numpy())**2, alpha=0.05, color='C0')
+        axs[1,1].plot(xaxis, np.abs(E_out_y.numpy())**2, alpha=0.05, color='C0')
+
+    if show:
+        plt.show()
+
+# def plt_dataloader(dataloader, show=True):
+#     _, axs = plt.subplots(2,2, sharex=True, sharey=True)
+
+#     E_outs, E_ins = next(iter(dataloader))
+#     for i, (E_out, E_in) in enumerate(zip(E_outs, E_ins)):
+#         xaxis = np.linspace(dataset.symbols_per_slice*i,dataset.symbols_per_slice+dataset.symbols_per_slice*i,dataset.samples_per_slice)
+#         E_in_x, E_in_y, E_out_x, E_out_y = E_in[0], E_in[1], E_out[0], E_out[1]
+#         axs[0,0].plot(xaxis, np.abs(E_in_x.numpy())**2)
+#         axs[1,0].plot(xaxis, np.abs(E_in_y.numpy())**2)
+#         axs[0,1].plot(xaxis, np.abs(E_out_x.numpy())**2)
+#         axs[1,1].plot(xaxis, np.abs(E_out_y.numpy())**2)
+
+#     if show:
+#         plt.show()
+
+if __name__ == "__main__":
+
+    dataset = SlicedDataset("data/20241115-175517-128-16384-10000-0-0-17-0-PAM4-0.ini", symbols=1, drop_first=100)
+    print(dataset[0][0].shape)
+
+    eye_dataset(dataset, 1000, offset=True, show=False)
+
+    train_loader = DataLoader(dataset, batch_size=10, shuffle=False)
+
+    # plt_dataloader(train_loader, show=False)
+
+    plt.show()
--- a/src/single-core-regen/testing/torch-import-test.py
+++ b/src/single-core-regen/testing/torch-import-test.py
--- a/src/single-core-regen/util/dataset.py
+++ b/src/single-core-regen/util/dataset.py
@@ -0,0 +1,53 @@
+from pathlib import Path
+import torch
+from torch.utils.data import Dataset
+import numpy as np
+import configparser
+
+class SlicedDataset(Dataset):
+    def __init__(self, config_path, symbols, drop_first=0):
+        """
+        Initialize the dataset.
+
+        :param config_path: Path to the configuration file
+        :type config_path: str
+        :param out_size: Output size in symbols
+        :type out_size: int
+        :param reduce: Reduce the dataset size by taking every reduce-th sample
+        :type reduce: int
+        """
+
+        self.config = configparser.ConfigParser()
+        self.config.read(Path(config_path))
+        
+        self.data_path = (Path(self.config['data']['dir'].strip('"')) / (self.config['data']['npy_dir'].strip('"')) / self.config['data']['file'].strip('"'))
+        
+        self.symbols_per_slice = symbols
+        self.samples_per_symbol = int(self.config['glova']['sps'])
+        self.samples_per_slice = self.symbols_per_slice * self.samples_per_symbol
+        
+        data_raw = torch.tensor(np.load(self.data_path))[drop_first*self.samples_per_symbol:]
+        data_raw = data_raw.transpose(0,1)
+        data_raw = data_raw.view(2,2,-1)
+        # [no_samples, 4] -> [4, no_samples] -> [2, 2, no_samples]
+        
+        self.data = data_raw.unfold(dimension=-1, size=self.samples_per_slice, step=1)
+        self.data = self.data.movedim(-2, 0)
+        # -> [no_slices, 2, 2, samples_per_slice]
+        ...
+
+
+    def __len__(self):
+        return self.data.shape[0]
+    
+    def __getitem__(self, idx):
+        if isinstance(idx, slice):
+            return [self.__getitem__(i) for i in range(*idx.indices(len(self)))]
+        else:
+            return (self.data[idx,1].squeeze(), self.data[idx,0].squeeze())
+    
+
+if __name__ == "__main__":
+    
+
+    pass