From 6cd188ef686261b10728d97a1aa312b27e437dbf Mon Sep 17 00:00:00 2001 From: Thalles Silva Date: Mon, 18 Jan 2021 07:33:12 -0300 Subject: [PATCH] Major refactor, small fixes --- README.md | 75 +++++++++---------------------------------------------- run.py | 4 +-- simclr.py | 14 +++++------ 3 files changed, 19 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index fa98d69..b6f25b6 100644 --- a/README.md +++ b/README.md @@ -16,78 +16,27 @@ $ python run.py ## Config file -Before running SimCLR, make sure you choose the correct running configurations on the ```config.yaml``` file. +Before running SimCLR, make sure you choose the correct running configurations. You can change the running configurations by passing keyword arguments to the ```run.py``` file. -```yaml +```python -# A batch size of N, produces 2 * (N-1) negative samples. Original implementation uses a batch size of 8192 -batch_size: 512 +$ python run.py -data ./datasets --dataset-name stl10 --log-every-n-steps 100 --epochs 100 -# Number of epochs to train -epochs: 40 - -# Frequency to eval the similarity score using the validation set -eval_every_n_epochs: 1 - -# Specify a folder containing a pre-trained model to fine-tune. If training from scratch, pass None. -fine_tune_from: 'resnet-18_80-epochs' - -# Frequency to which tensorboard is updated -log_every_n_steps: 50 - -# l2 Weight decay magnitude, original implementation uses 10e-6 -weight_decay: 10e-6 - -# if True, training is done using mixed precision. Apex needs to be installed in this case. -fp16_precision: False - -# Model related parameters -model: - # Output dimensionality of the embedding vector z. Original implementation uses 2048 - out_dim: 256 - - # The ConvNet base model. Choose one of: "resnet18" or "resnet50". Original implementation uses resnet50 - base_model: "resnet18" - -# Dataset related parameters -dataset: - s: 1 - - # dataset input shape. For datasets containing images of different size, this defines the final - input_shape: (96,96,3) - - # Number of workers for the data loader - num_workers: 0 - - # Size of the validation set in percentage - valid_size: 0.05 - -# NTXent loss related parameters -loss: - # Temperature parameter for the contrastive objective - temperature: 0.5 - - # Distance metric for contrastive loss. If False, uses dot product. Original implementation uses cosine similarity. - use_cosine_similarity: True ``` +If you want to run it on CPU (for debugging purposes) use the ```--disable-cuda``` option. + ## Feature Evaluation Feature evaluation is done using a linear model protocol. -Features are learned using the ```STL10 train+unsupervised``` set and evaluated in the ```test``` set; +First, we learned features using SimCLR on the ```STL10 unsupervised``` set. Then, we train a linear classifier on top of the frozen features from SimCLR. The linera model is trained on features extracted from the ```STL10 train``` set and evaluated on the ```STL10 test``` set. -Check the [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/sthalles/SimCLR/blob/master/feature_eval/linear_feature_eval.ipynb) notebook for reproducibility. +Check the [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/sthalles/SimCLR/blob/simclr-refactor/feature_eval/mini_batch_logistic_regression_evaluator.ipynb) notebook for reproducibility. -| Linear Classifier | Feature Extractor | Architecture | Feature dimensionality | Projection Head dimensionality | Epochs | STL10 Top 1 | -|:---------------------------:|:-----------------:|:------------:|:----------------------:|:-------------------------------:|:------:|:-----------:| -| Logistic Regression | PCA Features | - | 256 | - | | 36.0% | -| KNN | PCA Features | - | 256 | - | | 31.8% | -| Logistic Regression (LBFGS) | SimCLR | [ResNet-18](https://drive.google.com/open?id=1c4eVon0sUd-ChVhH6XMpF6nCngNJsAPk) | 512 | 256 | 40 | 70.3% | -| KNN | SimCLR | ResNet-18 | 512 | 256 | 40 | 66.2% | -| Logistic Regression (LBFGS) | SimCLR | [ResNet-18](https://drive.google.com/open?id=1L0yoeY9i2mzDcj69P4slTWb-cfr3PyoT) | 512 | 256 | 80 | 72.9% | -| KNN | SimCLR | ResNet-18 | 512 | 256 | 80 | 69.8% | -| Logistic Regression (Adam) | SimCLR | [ResNet-18](https://drive.google.com/open?id=1aZ12TITXnajZ6QWmS_SDm8Sp8gXNbeCQ) | 512 | 256 | 100 | 75.4% | -| Logistic Regression (Adam) | SimCLR | [ResNet-50](https://drive.google.com/open?id=1TZqBNTFCsO-mxAiR-zJeyupY-J2gA27Q) | 2048 | 128 | 40 | 74.6% | -| Logistic Regression (Adam) | SimCLR | [ResNet-50](https://drive.google.com/open?id=1is1wkBRccHdhSKQnPUTQoaFkVNSaCb35) | 2048 | 128 | 80 | 77.3% | +| Linear Classification | Dataset | Feature Extractor | Architecture | Feature dimensionality | Projection Head dimensionality | Epochs | Top 1 | +|----------------------------|---------|-------------------|---------------------------------------------------------------------------------|------------------------|--------------------------------|--------|--------| +| Logistic Regression (Adam) | STL10 | SimCLR | [ResNet-18](https://drive.google.com/open?id=14_nH2FkyKbt61cieQDiSbBVNP8-gtwgF) | 512 | 128 | 100 | 70.45 | +| Logistic Regression (Adam) | CIFAR10 | SimCLR | [ResNet-18](https://drive.google.com/open?id=1lc2aoVtrAetGn0PnTkOyFzPCIucOJq7C) | 512 | 128 | 100 | 64.82 | +| Logistic Regression (Adam) | STL10 | SimCLR | [ResNet-50](https://drive.google.com/open?id=1ByTKAUsdm_X7tLcii6oAEl5qFRqRMZSu) | 2048 | 128 | 50 | 67.075 | \ No newline at end of file diff --git a/run.py b/run.py index 75cdcf9..5b391ea 100644 --- a/run.py +++ b/run.py @@ -1,10 +1,10 @@ import argparse import torch +import torch.backends.cudnn as cudnn from torchvision import models from data_aug.contrastive_learning_dataset import ContrastiveLearningDataset from models.resnet_simclr import ResNetSimCLR from simclr import SimCLR -import torch.backends.cudnn as cudnn model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") @@ -34,8 +34,6 @@ parser.add_argument('--lr', '--learning-rate', default=0.0003, type=float, parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') parser.add_argument('--seed', default=None, type=int, help='seed for initializing training. ') parser.add_argument('--disable-cuda', action='store_true', diff --git a/simclr.py b/simclr.py index a41bc88..3cd18b6 100644 --- a/simclr.py +++ b/simclr.py @@ -1,11 +1,12 @@ +import logging import os import shutil import sys -import yaml + import torch -from torch.utils.tensorboard import SummaryWriter import torch.nn.functional as F -import logging +import yaml +from torch.utils.tensorboard import SummaryWriter from tqdm import tqdm torch.manual_seed(0) @@ -84,11 +85,8 @@ class SimCLR(object): self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O2', keep_batchnorm_fp32=True) - - model_checkpoints_folder = os.path.join(self.writer.log_dir, 'checkpoints') - # save config file - _save_config_file(model_checkpoints_folder, self.args) + _save_config_file(self.writer.log_dir, self.args) n_iter = 0 logging.info(f"Start SimCLR training for {self.args.epochs} epochs.") @@ -114,7 +112,7 @@ class SimCLR(object): self.optimizer.step() if n_iter % self.args.log_every_n_steps == 0: - top1, top5 = accuracy(logits, labels, topk=(1,5)) + top1, top5 = accuracy(logits, labels, topk=(1, 5)) self.writer.add_scalar('loss', loss, global_step=n_iter) self.writer.add_scalar('acc/top1', top1[0], global_step=n_iter) self.writer.add_scalar('acc/top5', top5[0], global_step=n_iter)