import torch import numpy as np from torch.utils.data import TensorDataset from sklearn.datasets import load_boston ## Load data in a numpy array # load data in bunch object data_bunch = load_boston() # get input data x = data_bunch['data'] # cut out 'B' column cut_x = np.concatenate((x[...,0:-2],x[...,-1][...,None]), axis=1) x = cut_x # get output data y = data_bunch['target'] # normalize data y_mean = np.mean(y) y_std = np.std(y) y = (y-y_mean)/y_std x_mean = np.mean(x, axis=0, keepdims=True) x_std = np.std(x, axis=0, keepdims=True) x = (x-x_mean)/x_std # randomly split for training and testing length_data = y.shape[0] test_percentage = 0.2 length_test_data = int(length_data * test_percentage) full_indices = np.arange(0, length_data) np.random.seed(0) test_indices = np.random.choice(full_indices, size=length_test_data, replace=False) train_indices = np.setdiff1d(full_indices, test_indices) train_x, train_y = [torch.tensor(t[train_indices,...], dtype=torch.float32) for t in (x,y)] test_x, test_y = [torch.tensor(t[test_indices,...], dtype=torch.float32) for t in (x,y)] # create datasets train_data = TensorDataset(train_x, train_y.view((-1,1))) test_data = TensorDataset(test_x, test_y.view((-1,1)))