import torch from EIVData.csv_dataset import CSVData from torch.utils.data import random_split def load_data(seed=0, splitting_part=0.8, normalize=True): """ Loads the concrete strength dataset :param seed: Seed for splitting and shuffling the data. Defaults to 0. :param splitting_part: Which fraction of the data to use as training data. Defaults to 0.8. :normalize: Whether to normalize the data, defaults to True. :returns: concrete_trainset, concrete_testset """ concrete_dataset = CSVData('~/SharedData/AI/datasets/concrete_compression_strength/compressive_strength_concrete.csv', class_name='Concrete compressive strength(MPa, megapascals) ', shuffle_seed=seed, normalize=normalize) dataset_len = len(concrete_dataset) train_len = int(dataset_len*splitting_part) test_len = dataset_len - train_len concrete_trainset, concrete_testset = random_split(concrete_dataset, lengths=[train_len, test_len], generator=torch.Generator().manual_seed(seed)) return concrete_trainset, concrete_testset