"Open

In [0]:
import torch
import sys
import numpy as np
import os
from sklearn.neighbors import KNeighborsClassifier
import yaml
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
import importlib.util

In [0]:
!pip install gdown

In [0]:
def get_file_id_by_model(folder_name):
 file_id = {'resnet-18_40-epochs': '1c4eVon0sUd-ChVhH6XMpF6nCngNJsAPk',
 'resnet-18_80-epochs': '1L0yoeY9i2mzDcj69P4slTWb-cfr3PyoT',
 'resnet-50_40-epochs': '1TZqBNTFCsO-mxAiR-zJeyupY-J2gA27Q',
 'resnet-50_80-epochs': '1is1wkBRccHdhSKQnPUTQoaFkVNSaCb35',
 'resnet-18_100-epochs':'1aZ12TITXnajZ6QWmS_SDm8Sp8gXNbeCQ'}
 return file_id.get(folder_name, "Model not found.")

In [0]:
folder_name = 'resnet-50_40-epochs'
file_id = get_file_id_by_model(folder_name)
print(folder_name, file_id)

In [0]:
# download and extract model files
os.system('gdown https://drive.google.com/uc?id={}'.format(file_id))
os.system('unzip {}'.format(folder_name))
!ls

In [0]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import datasets

In [0]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

In [0]:
checkpoints_folder = os.path.join(folder_name, 'checkpoints')
config = yaml.load(open(os.path.join(checkpoints_folder, "config.yaml"), "r"))
config

In [0]:
def get_stl10_data_loaders(download, shuffle=False, batch_size=128):
 train_dataset = datasets.STL10('./data', split='train', download=download,
 transform=transforms.ToTensor())

 train_loader = DataLoader(train_dataset, batch_size=batch_size,
 num_workers=0, drop_last=False, shuffle=shuffle)
 
 test_dataset = datasets.STL10('./data', split='test', download=download,
 transform=transforms.ToTensor())

 test_loader = DataLoader(test_dataset, batch_size=batch_size,
 num_workers=0, drop_last=False, shuffle=shuffle)
 return train_loader, test_loader

In [0]:
def _load_resnet_model(checkpoints_folder):
 # Load the neural net module
 spec = importlib.util.spec_from_file_location("model", os.path.join(checkpoints_folder, 'resnet_simclr.py'))
 resnet_module = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(resnet_module)

 model = resnet_module.ResNetSimCLR(**config['model'])
 model.eval()

 state_dict = torch.load(os.path.join(checkpoints_folder, 'model.pth'), map_location=torch.device('cpu'))
 model.load_state_dict(state_dict)
 model = model.to(device)
 return model

## Protocol #2 Logisitc Regression

In [0]:
class ResNetFeatureExtractor(object):
 def __init__(self, checkpoints_folder):
 self.checkpoints_folder = checkpoints_folder
 self.model = _load_resnet_model(checkpoints_folder)

 def _inference(self, loader):
 feature_vector = []
 labels_vector = []
 for batch_x, batch_y in loader:

 batch_x = batch_x.to(device)
 labels_vector.extend(batch_y)

 features, _ = self.model(batch_x)
 feature_vector.extend(features.cpu().detach().numpy())

 feature_vector = np.array(feature_vector)
 labels_vector = np.array(labels_vector)

 print("Features shape {}".format(feature_vector.shape))
 return feature_vector, labels_vector

 def get_resnet_features(self):
 train_loader, test_loader = get_stl10_data_loaders(download=True)
 X_train_feature, y_train = self._inference(train_loader)
 X_test_feature, y_test = self._inference(test_loader)

 return X_train_feature, y_train, X_test_feature, y_test

In [0]:
resnet_feature_extractor = ResNetFeatureExtractor(checkpoints_folder)

In [0]:
X_train_feature, y_train, X_test_feature, y_test = resnet_feature_extractor.get_resnet_features()

In [0]:
import torch.nn as nn

class LogisticRegression(nn.Module):
 
 def __init__(self, n_features, n_classes):
 super(LogisticRegression, self).__init__()
 self.model = nn.Linear(n_features, n_classes)

 def forward(self, x):
 return self.model(x)

In [0]:
class LogiticRegressionEvaluator(object):
 def __init__(self, n_features, n_classes):
 self.log_regression = LogisticRegression(n_features, n_classes).to(device)
 self.scaler = preprocessing.StandardScaler()

 def _normalize_dataset(self, X_train, X_test):
 print("Standard Scaling Normalizer")
 self.scaler.fit(X_train)
 X_train = self.scaler.transform(X_train)
 X_test = self.scaler.transform(X_test)
 return X_train, X_test

 @staticmethod
 def _sample_weight_decay():
 # We selected the l2 regularization parameter from a range of 45 logarithmically spaced values between 10−6 and 105
 weight_decay = np.logspace(-6, 5, num=45, base=10.0)
 weight_decay = np.random.choice(weight_decay)
 print("Sampled weight decay:", weight_decay)
 return weight_decay

 def eval(self, test_loader):
 correct = 0
 total = 0

 with torch.no_grad():
 self.log_regression.eval()
 for batch_x, batch_y in test_loader:
 batch_x, batch_y = batch_x.to(device), batch_y.to(device)
 logits = self.log_regression(batch_x)

 predicted = torch.argmax(logits, dim=1)
 total += batch_y.size(0)
 correct += (predicted == batch_y).sum().item()

 final_acc = 100 * correct / total
 self.log_regression.train()
 return final_acc


 def create_data_loaders_from_arrays(self, X_train, y_train, X_test, y_test):
 X_train, X_test = self._normalize_dataset(X_train, X_test)

 train = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train).type(torch.long))
 train_loader = torch.utils.data.DataLoader(train, batch_size=396, shuffle=False)

 test = torch.utils.data.TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test).type(torch.long))
 test_loader = torch.utils.data.DataLoader(test, batch_size=512, shuffle=False)
 return train_loader, test_loader

 def train(self, X_train, y_train, X_test, y_test):
 
 train_loader, test_loader = self.create_data_loaders_from_arrays(X_train, y_train, X_test, y_test)

 weight_decay = self._sample_weight_decay()

 optimizer = torch.optim.Adam(self.log_regression.parameters(), 3e-4, weight_decay=weight_decay)
 criterion = torch.nn.CrossEntropyLoss()

 best_accuracy = 0

 for e in range(200):
 
 for batch_x, batch_y in train_loader:

 batch_x, batch_y = batch_x.to(device), batch_y.to(device)

 optimizer.zero_grad()

 logits = self.log_regression(batch_x)

 loss = criterion(logits, batch_y)

 loss.backward()
 optimizer.step()

 epoch_acc = self.eval(test_loader)
 
 if epoch_acc > best_accuracy:
 #print("Saving new model with accuracy {}".format(epoch_acc))
 best_accuracy = epoch_acc
 torch.save(self.log_regression.state_dict(), 'log_regression.pth')

 print("--------------")
 print("Done training")
 print("Best accuracy:", best_accuracy)

In [0]:
log_regressor_evaluator = LogiticRegressionEvaluator(n_features=X_train_feature.shape[1], n_classes=10)

log_regressor_evaluator.train(X_train_feature, y_train, X_test_feature, y_test)