add SlicedDataset class and utility scripts; refactor: remove _path_fix.py and update imports;

This commit is contained in:
Joseph Hopfmüller
2024-11-17 01:04:33 +01:00
parent 90aa6dbaf8
commit 87f40fc37c
7 changed files with 172 additions and 11 deletions

View File

@@ -0,0 +1,51 @@
# move into dir single-core-regen before running
from util.dataset import SlicedDataset
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
import numpy as np
def eye_dataset(dataset, no_symbols=None, offset=False, show=True):
if no_symbols is None:
no_symbols = len(dataset)
_, axs = plt.subplots(2,2, sharex=True, sharey=True)
xaxis = np.linspace(0,dataset.symbols_per_slice,dataset.samples_per_slice)
roll = dataset.samples_per_symbol//2 if offset else 0
for E_out, E_in in dataset[roll:dataset.samples_per_symbol*no_symbols+roll:dataset.samples_per_symbol]:
E_in_x, E_in_y, E_out_x, E_out_y = E_in[0], E_in[1], E_out[0], E_out[1]
axs[0,0].plot(xaxis, np.abs( E_in_x.numpy())**2, alpha=0.05, color='C0')
axs[1,0].plot(xaxis, np.abs( E_in_y.numpy())**2, alpha=0.05, color='C0')
axs[0,1].plot(xaxis, np.abs(E_out_x.numpy())**2, alpha=0.05, color='C0')
axs[1,1].plot(xaxis, np.abs(E_out_y.numpy())**2, alpha=0.05, color='C0')
if show:
plt.show()
# def plt_dataloader(dataloader, show=True):
# _, axs = plt.subplots(2,2, sharex=True, sharey=True)
# E_outs, E_ins = next(iter(dataloader))
# for i, (E_out, E_in) in enumerate(zip(E_outs, E_ins)):
# xaxis = np.linspace(dataset.symbols_per_slice*i,dataset.symbols_per_slice+dataset.symbols_per_slice*i,dataset.samples_per_slice)
# E_in_x, E_in_y, E_out_x, E_out_y = E_in[0], E_in[1], E_out[0], E_out[1]
# axs[0,0].plot(xaxis, np.abs(E_in_x.numpy())**2)
# axs[1,0].plot(xaxis, np.abs(E_in_y.numpy())**2)
# axs[0,1].plot(xaxis, np.abs(E_out_x.numpy())**2)
# axs[1,1].plot(xaxis, np.abs(E_out_y.numpy())**2)
# if show:
# plt.show()
if __name__ == "__main__":
dataset = SlicedDataset("data/20241115-175517-128-16384-10000-0-0-17-0-PAM4-0.ini", symbols=1, drop_first=100)
print(dataset[0][0].shape)
eye_dataset(dataset, 1000, offset=True, show=False)
train_loader = DataLoader(dataset, batch_size=10, shuffle=False)
# plt_dataloader(train_loader, show=False)
plt.show()

View File

@@ -0,0 +1,53 @@
from pathlib import Path
import torch
from torch.utils.data import Dataset
import numpy as np
import configparser
class SlicedDataset(Dataset):
def __init__(self, config_path, symbols, drop_first=0):
"""
Initialize the dataset.
:param config_path: Path to the configuration file
:type config_path: str
:param out_size: Output size in symbols
:type out_size: int
:param reduce: Reduce the dataset size by taking every reduce-th sample
:type reduce: int
"""
self.config = configparser.ConfigParser()
self.config.read(Path(config_path))
self.data_path = (Path(self.config['data']['dir'].strip('"')) / (self.config['data']['npy_dir'].strip('"')) / self.config['data']['file'].strip('"'))
self.symbols_per_slice = symbols
self.samples_per_symbol = int(self.config['glova']['sps'])
self.samples_per_slice = self.symbols_per_slice * self.samples_per_symbol
data_raw = torch.tensor(np.load(self.data_path))[drop_first*self.samples_per_symbol:]
data_raw = data_raw.transpose(0,1)
data_raw = data_raw.view(2,2,-1)
# [no_samples, 4] -> [4, no_samples] -> [2, 2, no_samples]
self.data = data_raw.unfold(dimension=-1, size=self.samples_per_slice, step=1)
self.data = self.data.movedim(-2, 0)
# -> [no_slices, 2, 2, samples_per_slice]
...
def __len__(self):
return self.data.shape[0]
def __getitem__(self, idx):
if isinstance(idx, slice):
return [self.__getitem__(i) for i in range(*idx.indices(len(self)))]
else:
return (self.data[idx,1].squeeze(), self.data[idx,0].squeeze())
if __name__ == "__main__":
pass