In [1]:
import torch
from model import Encoder, ResNet18
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np

In [2]:
batch_size = 256
out_dim = 64

In [3]:
X_train = np.fromfile('data/stl10_binary/train_X.bin', dtype=np.uint8)
y_train = np.fromfile('data/stl10_binary/train_y.bin', dtype=np.uint8)

X_train = np.reshape(X_train, (-1, 3, 96, 96))
X_train = np.transpose(X_train, (0, 3, 2, 1))
print("Training images")
print(X_train.shape)
print(y_train.shape)

Training images
(5000, 96, 96, 3)
(5000,)


In [4]:
X_test = np.fromfile('data/stl10_binary/test_X.bin', dtype=np.uint8)
y_test = np.fromfile('data/stl10_binary/test_y.bin', dtype=np.uint8)

X_test = np.reshape(X_test, (-1, 3, 96, 96))
X_test = np.transpose(X_test, (0, 3, 2, 1))
print("Test images")
print(X_test.shape)
print(y_test.shape)

Test images
(8000, 96, 96, 3)
(8000,)


## Test protocol #1 PCA features

In [5]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression

In [6]:
pca = PCA(n_components=out_dim)
X_train_pca = pca.fit_transform(X_train.reshape((X_train.shape[0],-1)))
X_test_pca = pca.transform(X_test.reshape((X_test.shape[0],-1)))

print("PCA features")
print(X_train_pca.shape)
print(X_test_pca.shape)

PCA features
(5000, 64)
(8000, 64)


In [7]:
clf = LogisticRegression(random_state=0).fit(X_train_pca, y_train)
print("PCA feature evaluation")
print("Train score:", clf.score(X_train_pca, y_train))
print("Test score:", clf.score(X_test_pca, y_test))

PCA feature evaluation
Train score: 0.3984
Test score: 0.353125


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
 https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
 https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [16]:
data_augment = transforms.Compose([transforms.RandomResizedCrop(96),
 transforms.ToTensor()])

train_dataset = datasets.STL10('data', split='train', download=True, transform=data_augment)
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=1, drop_last=False, shuffle=False)

Files already downloaded and verified


In [17]:
test_dataset = datasets.STL10('data', split='test', download=True, transform=data_augment)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, drop_last=False, shuffle=False)

Files already downloaded and verified


In [18]:
model = ResNet18(out_dim=out_dim)
model.eval()
print(model)

state_dict = torch.load('model/checkpoint.pth')
print(state_dict.keys())

model.load_state_dict(state_dict)

ResNet18(
 (features): Sequential(
 (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
 (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (2): ReLU(inplace=True)
 (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
 (4): Sequential(
 (0): BasicBlock(
 (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 )
 (1): BasicBlock(
 (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (conv2): Conv2d(64, 64, kernel_size=(3, 3), stri



## Protocol #2 Linear separability evaluation

In [19]:
X_train_feature = []

for step, (batch_x, batch_y) in enumerate(train_loader):
 features, _ = model(batch_x)
 X_train_feature.extend(features.detach().numpy())
 
X_train_feature = np.array(X_train_feature)

print("Train features")
print(X_train_feature.shape)

Train features
(5000, 512)


In [20]:
X_test_feature = []

for step, (batch_x, batch_y) in enumerate(test_loader):
 features, _ = model(batch_x)
 X_test_feature.extend(features.detach().numpy())
 
X_test_feature = np.array(X_test_feature)

print("Test features")
print(X_test_feature.shape)

Test features
(8000, 512)


In [21]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train_feature, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
 https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
 https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [22]:
print("SimCLR feature evaluation")
print("Train score:", clf.score(X_train_feature, y_train))
print("Test score:", clf.score(X_test_feature, y_test))
# SimCLR feature evaluation
# Train score: 0.5298
# Test score: 0.52075

SimCLR feature evaluation
Train score: 0.7444
Test score: 0.62625
