update 10 NAS algs

2019-11-15 17:15:07 +11:00
parent 672a9ef0db
commit c72e66b66c
139 changed files with 5863 additions and 368 deletions
--- a/others/GDAS/lib/datasets/LanguageDataset.py
+++ b/others/GDAS/lib/datasets/LanguageDataset.py
@@ -0,0 +1,122 @@
+import os
+import torch
+
+from collections import Counter
+
+
+class Dictionary(object):
+  def __init__(self):
+    self.word2idx = {}
+    self.idx2word = []
+    self.counter = Counter()
+    self.total = 0
+
+  def add_word(self, word):
+    if word not in self.word2idx:
+      self.idx2word.append(word)
+      self.word2idx[word] = len(self.idx2word) - 1
+    token_id = self.word2idx[word]
+    self.counter[token_id] += 1
+    self.total += 1
+    return self.word2idx[word]
+
+  def __len__(self):
+    return len(self.idx2word)
+
+
+class Corpus(object):
+  def __init__(self, path):
+    self.dictionary = Dictionary()
+    self.train = self.tokenize(os.path.join(path, 'train.txt'))
+    self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
+    self.test = self.tokenize(os.path.join(path, 'test.txt'))
+
+  def tokenize(self, path):
+    """Tokenizes a text file."""
+    assert os.path.exists(path)
+    # Add words to the dictionary
+    with open(path, 'r', encoding='utf-8') as f:
+      tokens = 0
+      for line in f:
+        words = line.split() + ['<eos>']
+        tokens += len(words)
+        for word in words:
+          self.dictionary.add_word(word)
+
+    # Tokenize file content
+    with open(path, 'r', encoding='utf-8') as f:
+      ids = torch.LongTensor(tokens)
+      token = 0
+      for line in f:
+        words = line.split() + ['<eos>']
+        for word in words:
+          ids[token] = self.dictionary.word2idx[word]
+          token += 1
+
+    return ids
+
+class SentCorpus(object):
+  def __init__(self, path):
+    self.dictionary = Dictionary()
+    self.train = self.tokenize(os.path.join(path, 'train.txt'))
+    self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
+    self.test = self.tokenize(os.path.join(path, 'test.txt'))
+
+  def tokenize(self, path):
+    """Tokenizes a text file."""
+    assert os.path.exists(path)
+    # Add words to the dictionary
+    with open(path, 'r', encoding='utf-8') as f:
+      tokens = 0
+      for line in f:
+        words = line.split() + ['<eos>']
+        tokens += len(words)
+        for word in words:
+          self.dictionary.add_word(word)
+
+    # Tokenize file content
+    sents = []
+    with open(path, 'r', encoding='utf-8') as f:
+      for line in f:
+        if not line:
+          continue
+        words = line.split() + ['<eos>']
+        sent = torch.LongTensor(len(words))
+        for i, word in enumerate(words):
+          sent[i] = self.dictionary.word2idx[word]
+        sents.append(sent)
+
+    return sents
+
+class BatchSentLoader(object):
+  def __init__(self, sents, batch_size, pad_id=0, cuda=False, volatile=False):
+    self.sents = sents
+    self.batch_size = batch_size
+    self.sort_sents = sorted(sents, key=lambda x: x.size(0))
+    self.cuda = cuda
+    self.volatile = volatile
+    self.pad_id = pad_id
+
+  def __next__(self):
+    if self.idx >= len(self.sort_sents):
+      raise StopIteration
+
+    batch_size = min(self.batch_size, len(self.sort_sents)-self.idx)
+    batch = self.sort_sents[self.idx:self.idx+batch_size]
+    max_len = max([s.size(0) for s in batch])
+    tensor = torch.LongTensor(max_len, batch_size).fill_(self.pad_id)
+    for i in range(len(batch)):
+      s = batch[i]
+      tensor[:s.size(0),i].copy_(s)
+    if self.cuda:
+      tensor = tensor.cuda()
+
+    self.idx += batch_size
+
+    return tensor
+  
+  next = __next__
+
+  def __iter__(self):
+    self.idx = 0
+    return self
--- a/others/GDAS/lib/datasets/MetaBatchSampler.py
+++ b/others/GDAS/lib/datasets/MetaBatchSampler.py
@@ -0,0 +1,65 @@
+# coding=utf-8
+import numpy as np
+import torch
+
+
+class MetaBatchSampler(object):
+
+  def __init__(self, labels, classes_per_it, num_samples, iterations):
+    '''
+    Initialize MetaBatchSampler
+    Args:
+    - labels: an iterable containing all the labels for the current dataset
+    samples indexes will be infered from this iterable.
+    - classes_per_it: number of random classes for each iteration
+    - num_samples: number of samples for each iteration for each class (support + query)
+    - iterations: number of iterations (episodes) per epoch
+    '''
+    super(MetaBatchSampler, self).__init__()
+    self.labels           = labels.copy()
+    self.classes_per_it   = classes_per_it
+    self.sample_per_class = num_samples
+    self.iterations       = iterations
+
+    self.classes, self.counts = np.unique(self.labels, return_counts=True)
+    assert len(self.classes) == np.max(self.classes) + 1 and np.min(self.classes) == 0
+    assert classes_per_it < len(self.classes), '{:} vs. {:}'.format(classes_per_it, len(self.classes))
+    self.classes = torch.LongTensor(self.classes)
+
+    # create a matrix, indexes, of dim: classes X max(elements per class)
+    # fill it with nans
+    # for every class c, fill the relative row with the indices samples belonging to c
+    # in numel_per_class we store the number of samples for each class/row
+    self.indexes = { x.item() : [] for x in self.classes }
+    indexes = { x.item() : [] for x in self.classes }
+
+    for idx, label in enumerate(self.labels):
+      indexes[ label.item() ].append( idx )
+    for key, value in indexes.items():
+      self.indexes[ key ] = torch.LongTensor( value )
+
+
+  def __iter__(self):
+    # yield a batch of indexes
+    spc = self.sample_per_class
+    cpi = self.classes_per_it
+
+    for it in range(self.iterations):
+      batch_size = spc * cpi
+      batch = torch.LongTensor(batch_size)
+      assert cpi < len(self.classes), '{:} vs. {:}'.format(cpi, len(self.classes))
+      c_idxs = torch.randperm(len(self.classes))[:cpi]
+
+      for i, cls in enumerate(self.classes[c_idxs]):
+        s = slice(i * spc, (i + 1) * spc)
+        num = self.indexes[ cls.item() ].nelement()
+        assert spc < num, '{:} vs. {:}'.format(spc, num)
+        sample_idxs = torch.randperm( num )[:spc]
+        batch[s] = self.indexes[ cls.item() ][sample_idxs]
+
+      batch = batch[torch.randperm(len(batch))]
+      yield batch
+
+  def __len__(self):
+    # returns the number of iterations (episodes) per epoch
+    return self.iterations
--- a/others/GDAS/lib/datasets/TieredImageNet.py
+++ b/others/GDAS/lib/datasets/TieredImageNet.py
@@ -0,0 +1,84 @@
+from __future__ import print_function
+import numpy as np
+from PIL import Image
+import pickle as pkl
+import os, cv2, csv, glob
+import torch
+import torch.utils.data as data
+
+
+class TieredImageNet(data.Dataset):
+
+  def __init__(self, root_dir, split, transform=None):
+    self.split = split
+    self.root_dir = root_dir
+    self.transform = transform
+    splits = split.split('-')
+
+    images, labels, last = [], [], 0
+    for split in splits:
+      labels_name = '{:}/{:}_labels.pkl'.format(self.root_dir, split)
+      images_name = '{:}/{:}_images.npz'.format(self.root_dir, split)
+      # decompress images if npz not exits
+      if not os.path.exists(images_name):
+        png_pkl = images_name[:-4] + '_png.pkl'
+        if os.path.exists(png_pkl):
+          decompress(images_name, png_pkl)
+        else:
+          raise ValueError('png_pkl {:} not exits'.format( png_pkl ))
+      assert os.path.exists(images_name) and os.path.exists(labels_name), '{:} & {:}'.format(images_name, labels_name)
+      print ("Prepare {:} done".format(images_name))
+      try:
+        with open(labels_name) as f:
+          data = pkl.load(f)
+          label_specific = data["label_specific"]
+      except:
+        with open(labels_name, 'rb') as f:
+          data = pkl.load(f, encoding='bytes')
+          label_specific = data[b'label_specific']
+      with np.load(images_name, mmap_mode="r", encoding='latin1') as data:
+        image_data = data["images"]
+      images.append( image_data )
+      label_specific = label_specific + last
+      labels.append( label_specific )
+      last = np.max(label_specific) + 1
+      print ("Load {:} done, with image shape = {:}, label shape = {:}, [{:} ~ {:}]".format(images_name, image_data.shape, label_specific.shape, np.min(label_specific), np.max(label_specific)))
+    images, labels = np.concatenate(images), np.concatenate(labels)
+
+    self.images = images
+    self.labels = labels
+    self.n_classes = int( np.max(labels) + 1 )
+    self.dict_index_label = {}
+    for cls in range(self.n_classes):
+      idxs = np.where(labels==cls)[0]
+      self.dict_index_label[cls] = idxs
+    self.length = len(labels)
+    print ("There are {:} images, {:} labels [{:} ~ {:}]".format(images.shape, labels.shape, np.min(labels), np.max(labels)))
+  
+
+  def __repr__(self):
+    return ('{name}(length={length}, classes={n_classes})'.format(name=self.__class__.__name__, **self.__dict__))
+
+  def __len__(self):
+    return self.length
+
+  def __getitem__(self, index):
+    assert index >= 0 and index < self.length, 'invalid index = {:}'.format(index)
+    image = self.images[index].copy()
+    label = int(self.labels[index])
+    image = Image.fromarray(image[:,:,::-1].astype('uint8'), 'RGB')
+    if self.transform is not None:
+      image = self.transform( image )
+    return image, label
+
+
+
+
+def decompress(path, output):
+  with open(output, 'rb') as f:
+    array = pkl.load(f, encoding='bytes')
+  images = np.zeros([len(array), 84, 84, 3], dtype=np.uint8)
+  for ii, item in enumerate(array):
+    im = cv2.imdecode(item, 1)
+    images[ii] = im
+  np.savez(path, images=images)
--- a/others/GDAS/lib/datasets/init.py
+++ b/others/GDAS/lib/datasets/init.py
@@ -0,0 +1,7 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .MetaBatchSampler import MetaBatchSampler
+from .TieredImageNet import TieredImageNet
+from .LanguageDataset import Corpus
+from .get_dataset_with_transform import get_datasets
--- a/others/GDAS/lib/datasets/get_dataset_with_transform.py
+++ b/others/GDAS/lib/datasets/get_dataset_with_transform.py
@@ -0,0 +1,77 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, torch
+import os.path as osp
+import torchvision.datasets as dset
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+
+from utils import Cutout
+from .TieredImageNet import TieredImageNet
+
+
+Dataset2Class = {'cifar10' : 10,
+                 'cifar100': 100,
+                 'tiered'  : -1,
+                 'imagenet-1k' : 1000,
+                 'imagenet-100': 100}
+
+
+def get_datasets(name, root, cutout):
+
+  # Mean + Std
+  if name == 'cifar10':
+    mean = [x / 255 for x in [125.3, 123.0, 113.9]]
+    std = [x / 255 for x in [63.0, 62.1, 66.7]]
+  elif name == 'cifar100':
+    mean = [x / 255 for x in [129.3, 124.1, 112.4]]
+    std = [x / 255 for x in [68.2, 65.4, 70.4]]
+  elif name == 'tiered':
+    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+  elif name == 'imagenet-1k' or name == 'imagenet-100':
+    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+  else: raise TypeError("Unknow dataset : {:}".format(name))
+
+
+  # Data Argumentation
+  if name == 'cifar10' or name == 'cifar100':
+    lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
+             transforms.Normalize(mean, std)]
+    if cutout > 0 : lists += [Cutout(cutout)]
+    train_transform = transforms.Compose(lists)
+    test_transform  = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
+  elif name == 'tiered':
+    lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
+    if cutout > 0 : lists += [Cutout(cutout)]
+    train_transform = transforms.Compose(lists)
+    test_transform  = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(mean, std)])
+  elif name == 'imagenet-1k' or name == 'imagenet-100':
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    train_transform = transforms.Compose([
+      transforms.RandomResizedCrop(224),
+      transforms.RandomHorizontalFlip(),
+      transforms.ColorJitter(
+        brightness=0.4,
+        contrast=0.4,
+        saturation=0.4,
+        hue=0.2),
+      transforms.ToTensor(),
+      normalize,
+    ])
+    test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize])
+  else: raise TypeError("Unknow dataset : {:}".format(name))
+
+  if name == 'cifar10':
+    train_data = dset.CIFAR10 (root, train=True , transform=train_transform, download=True)
+    test_data  = dset.CIFAR10 (root, train=False, transform=test_transform , download=True)
+  elif name == 'cifar100':
+    train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True)
+    test_data  = dset.CIFAR100(root, train=False, transform=test_transform , download=True)
+  elif name == 'imagenet-1k' or name == 'imagenet-100':
+    train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
+    test_data  = dset.ImageFolder(osp.join(root, 'val'),   test_transform)
+  else: raise TypeError("Unknow dataset : {:}".format(name))
+  
+  class_num = Dataset2Class[name]
+  return train_data, test_data, class_num
--- a/others/GDAS/lib/datasets/test_NLP.py
+++ b/others/GDAS/lib/datasets/test_NLP.py
@@ -0,0 +1,10 @@
+import os, sys, torch
+
+from LanguageDataset import SentCorpus, BatchSentLoader
+
+if __name__ == '__main__':
+  path = '../../data/data/penn'
+  corpus = SentCorpus( path )
+  loader = BatchSentLoader(corpus.test, 10)
+  for i, d in enumerate(loader):
+    print('{:} :: {:}'.format(i, d.size()))
--- a/others/GDAS/lib/datasets/test_dataset.py
+++ b/others/GDAS/lib/datasets/test_dataset.py
@@ -0,0 +1,33 @@
+import os, sys, torch
+import torchvision.transforms as transforms
+
+from TieredImageNet import TieredImageNet
+from MetaBatchSampler import MetaBatchSampler
+
+root_dir = os.environ['TORCH_HOME'] + '/tiered-imagenet'
+print ('root : {:}'.format(root_dir))
+means, stds = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+
+lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(84, padding=8), transforms.ToTensor(), transforms.Normalize(means, stds)]
+transform = transforms.Compose(lists)
+
+dataset = TieredImageNet(root_dir, 'val-test', transform)
+image, label = dataset[111]
+print ('image shape = {:}, label = {:}'.format(image.size(), label))
+print ('image : min = {:}, max = {:}    ||| label : {:}'.format(image.min(), image.max(), label))
+
+
+sampler = MetaBatchSampler(dataset.labels, 250, 100, 10)
+
+dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler)
+
+print ('the length of dataset : {:}'.format( len(dataset) ))
+print ('the length of loader  : {:}'.format( len(dataloader) ))
+
+for images, labels in dataloader:
+  print ('images : {:}'.format( images.size() ))
+  print ('labels : {:}'.format( labels.size() ))
+  for i in range(3):
+    print ('image-value-[{:}] : {:} ~ {:}, mean={:}, std={:}'.format(i, images[:,i].min(), images[:,i].max(), images[:,i].mean(), images[:,i].std()))
+
+print('-----')
--- a/others/GDAS/lib/nas/CifarNet.py
+++ b/others/GDAS/lib/nas/CifarNet.py
@@ -0,0 +1,89 @@
+import torch
+import torch.nn as nn
+from .construct_utils import Cell, Transition
+
+class AuxiliaryHeadCIFAR(nn.Module):
+
+  def __init__(self, C, num_classes):
+    """assuming input size 8x8"""
+    super(AuxiliaryHeadCIFAR, self).__init__()
+    self.features = nn.Sequential(
+      nn.ReLU(inplace=True),
+      nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
+      nn.Conv2d(C, 128, 1, bias=False),
+      nn.BatchNorm2d(128),
+      nn.ReLU(inplace=True),
+      nn.Conv2d(128, 768, 2, bias=False),
+      nn.BatchNorm2d(768),
+      nn.ReLU(inplace=True)
+    )
+    self.classifier = nn.Linear(768, num_classes)
+
+  def forward(self, x):
+    x = self.features(x)
+    x = self.classifier(x.view(x.size(0),-1))
+    return x
+
+
+class NetworkCIFAR(nn.Module):
+
+  def __init__(self, C, num_classes, layers, auxiliary, genotype):
+    super(NetworkCIFAR, self).__init__()
+    self._layers = layers
+
+    stem_multiplier = 3
+    C_curr = stem_multiplier*C
+    self.stem = nn.Sequential(
+      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
+      nn.BatchNorm2d(C_curr)
+    )
+    
+    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
+    self.cells = nn.ModuleList()
+    reduction_prev = False
+    for i in range(layers):
+      if i in [layers//3, 2*layers//3]:
+        C_curr *= 2
+        reduction = True
+      else:
+        reduction = False
+      if reduction and genotype.reduce is None:
+        cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
+      else:
+        cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
+      reduction_prev = reduction
+      self.cells.append( cell )
+      C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
+      if i == 2*layers//3:
+        C_to_auxiliary = C_prev
+
+    if auxiliary:
+      self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
+    else:
+      self.auxiliary_head = None
+    self.global_pooling = nn.AdaptiveAvgPool2d(1)
+    self.classifier = nn.Linear(C_prev, num_classes)
+    self.drop_path_prob = -1
+
+  def update_drop_path(self, drop_path_prob):
+    self.drop_path_prob = drop_path_prob
+
+  def auxiliary_param(self):
+    if self.auxiliary_head is None: return []
+    else: return list( self.auxiliary_head.parameters() )
+
+  def forward(self, inputs):
+    s0 = s1 = self.stem(inputs)
+    for i, cell in enumerate(self.cells):
+      s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+      if i == 2*self._layers//3:
+        if self.auxiliary_head and self.training:
+          logits_aux = self.auxiliary_head(s1)
+    out = self.global_pooling(s1)
+    out = out.view(out.size(0), -1)
+    logits = self.classifier(out)
+
+    if self.auxiliary_head and self.training:
+      return logits, logits_aux
+    else:
+      return logits
--- a/others/GDAS/lib/nas/ImageNet.py
+++ b/others/GDAS/lib/nas/ImageNet.py
@@ -0,0 +1,104 @@
+import torch
+import torch.nn as nn
+from .construct_utils import Cell, Transition
+
+class AuxiliaryHeadImageNet(nn.Module):
+
+  def __init__(self, C, num_classes):
+    """assuming input size 14x14"""
+    super(AuxiliaryHeadImageNet, self).__init__()
+    self.features = nn.Sequential(
+      nn.ReLU(inplace=True),
+      nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
+      nn.Conv2d(C, 128, 1, bias=False),
+      nn.BatchNorm2d(128),
+      nn.ReLU(inplace=True),
+      nn.Conv2d(128, 768, 2, bias=False),
+      # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
+      # Commenting it out for consistency with the experiments in the paper.
+      # nn.BatchNorm2d(768),
+      nn.ReLU(inplace=True)
+    )
+    self.classifier = nn.Linear(768, num_classes)
+
+  def forward(self, x):
+    x = self.features(x)
+    x = self.classifier(x.view(x.size(0),-1))
+    return x
+
+
+
+
+class NetworkImageNet(nn.Module):
+
+  def __init__(self, C, num_classes, layers, auxiliary, genotype):
+    super(NetworkImageNet, self).__init__()
+    self._layers = layers
+
+    self.stem0 = nn.Sequential(
+      nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
+      nn.BatchNorm2d(C // 2),
+      nn.ReLU(inplace=True),
+      nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
+      nn.BatchNorm2d(C),
+    )
+
+    self.stem1 = nn.Sequential(
+      nn.ReLU(inplace=True),
+      nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
+      nn.BatchNorm2d(C),
+    )
+
+    C_prev_prev, C_prev, C_curr = C, C, C
+
+    self.cells = nn.ModuleList()
+    reduction_prev = True
+    for i in range(layers):
+      if i in [layers // 3, 2 * layers // 3]:
+        C_curr *= 2
+        reduction = True
+      else:
+        reduction = False
+      if reduction and genotype.reduce is None:
+        cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev)
+      else:
+        cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
+      reduction_prev = reduction
+      self.cells += [cell]
+      C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+      if i == 2 * layers // 3:
+        C_to_auxiliary = C_prev
+
+    if auxiliary:
+      self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
+    else:
+      self.auxiliary_head = None
+    self.global_pooling = nn.AvgPool2d(7)
+    self.classifier = nn.Linear(C_prev, num_classes)
+    self.drop_path_prob = -1
+
+  def update_drop_path(self, drop_path_prob):
+    self.drop_path_prob = drop_path_prob
+
+  def get_drop_path(self):
+    return self.drop_path_prob
+
+  def auxiliary_param(self):
+    if self.auxiliary_head is None: return []
+    else: return list( self.auxiliary_head.parameters() )
+
+  def forward(self, input):
+    s0 = self.stem0(input)
+    s1 = self.stem1(s0)
+    for i, cell in enumerate(self.cells):
+      s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+      #print ('{:} : {:} - {:}'.format(i, s0.size(), s1.size()))
+      if i == 2 * self._layers // 3:
+        if self.auxiliary_head and self.training:
+          logits_aux = self.auxiliary_head(s1)
+    out = self.global_pooling(s1)
+    logits = self.classifier(out.view(out.size(0), -1))
+    if self.auxiliary_head and self.training:
+      return logits, logits_aux
+    else:
+      return logits
--- a/others/GDAS/lib/nas/SE_Module.py
+++ b/others/GDAS/lib/nas/SE_Module.py
@@ -0,0 +1,27 @@
+import torch
+import torch.nn as nn
+# Squeeze and Excitation module
+
+class SqEx(nn.Module):
+
+  def __init__(self, n_features, reduction=16):
+    super(SqEx, self).__init__()
+
+    if n_features % reduction != 0:
+      raise ValueError('n_features must be divisible by reduction (default = 16)')
+
+    self.linear1 = nn.Linear(n_features, n_features // reduction, bias=True)
+    self.nonlin1 = nn.ReLU(inplace=True)
+    self.linear2 = nn.Linear(n_features // reduction, n_features, bias=True)
+    self.nonlin2 = nn.Sigmoid()
+
+  def forward(self, x):
+
+    y = F.avg_pool2d(x, kernel_size=x.size()[2:4])
+    y = y.permute(0, 2, 3, 1)
+    y = self.nonlin1(self.linear1(y))
+    y = self.nonlin2(self.linear2(y))
+    y = y.permute(0, 3, 1, 2)
+    y = x * y
+    return y
+
--- a/others/GDAS/lib/nas/init.py
+++ b/others/GDAS/lib/nas/init.py
@@ -0,0 +1,10 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .CifarNet        import NetworkCIFAR
+from .ImageNet        import NetworkImageNet
+
+# genotypes
+from .genotypes       import model_types
+
+from .construct_utils import return_alphas_str
--- a/others/GDAS/lib/nas/construct_utils.py
+++ b/others/GDAS/lib/nas/construct_utils.py
@@ -0,0 +1,152 @@
+import random
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .operations import OPS, FactorizedReduce, ReLUConvBN, Identity
+
+
+def random_select(length, ratio):
+  clist = []
+  index = random.randint(0, length-1)
+  for i in range(length):
+    if i == index or random.random() < ratio:
+      clist.append( 1 )
+    else:
+      clist.append( 0 )
+  return clist
+
+
+def all_select(length):
+  return [1 for i in range(length)]
+
+
+def drop_path(x, drop_prob):
+  if drop_prob > 0.:
+    keep_prob = 1. - drop_prob
+    mask = x.new_zeros(x.size(0), 1, 1, 1)
+    mask = mask.bernoulli_(keep_prob)
+    x.div_(keep_prob)
+    x.mul_(mask)
+  return x
+
+
+def return_alphas_str(basemodel):
+  string = 'normal : {:}'.format( F.softmax(basemodel.alphas_normal, dim=-1) )
+  if hasattr(basemodel, 'alphas_reduce'):
+    string = string + '\nreduce : {:}'.format( F.softmax(basemodel.alphas_reduce, dim=-1) )
+  return string
+
+
+class Cell(nn.Module):
+
+  def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
+    super(Cell, self).__init__()
+    print(C_prev_prev, C_prev, C)
+
+    if reduction_prev:
+      self.preprocess0 = FactorizedReduce(C_prev_prev, C)
+    else:
+      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
+    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
+    
+    if reduction:
+      op_names, indices, values = zip(*genotype.reduce)
+      concat = genotype.reduce_concat
+    else:
+      op_names, indices, values = zip(*genotype.normal)
+      concat = genotype.normal_concat
+    self._compile(C, op_names, indices, values, concat, reduction)
+
+  def _compile(self, C, op_names, indices, values, concat, reduction):
+    assert len(op_names) == len(indices)
+    self._steps = len(op_names) // 2
+    self._concat = concat
+    self.multiplier = len(concat)
+
+    self._ops = nn.ModuleList()
+    for name, index in zip(op_names, indices):
+      stride = 2 if reduction and index < 2 else 1
+      op = OPS[name](C, stride, True)
+      self._ops.append( op )
+    self._indices = indices
+    self._values  = values
+
+  def forward(self, s0, s1, drop_prob):
+    s0 = self.preprocess0(s0)
+    s1 = self.preprocess1(s1)
+
+    states = [s0, s1]
+    for i in range(self._steps):
+      h1 = states[self._indices[2*i]]
+      h2 = states[self._indices[2*i+1]]
+      op1 = self._ops[2*i]
+      op2 = self._ops[2*i+1]
+      h1 = op1(h1)
+      h2 = op2(h2)
+      if self.training and drop_prob > 0.:
+        if not isinstance(op1, Identity):
+          h1 = drop_path(h1, drop_prob)
+        if not isinstance(op2, Identity):
+          h2 = drop_path(h2, drop_prob)
+
+      s = h1 + h2
+
+      states += [s]
+    return torch.cat([states[i] for i in self._concat], dim=1)
+
+
+
+class Transition(nn.Module):
+
+  def __init__(self, C_prev_prev, C_prev, C, reduction_prev, multiplier=4):
+    super(Transition, self).__init__()
+    if reduction_prev:
+      self.preprocess0 = FactorizedReduce(C_prev_prev, C)
+    else:
+      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
+    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
+    self.multiplier  = multiplier
+
+    self.reduction = True
+    self.ops1 = nn.ModuleList(
+                  [nn.Sequential(
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
+                      nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
+                      nn.BatchNorm2d(C, affine=True),
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
+                      nn.BatchNorm2d(C, affine=True)),
+                   nn.Sequential(
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
+                      nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
+                      nn.BatchNorm2d(C, affine=True),
+                      nn.ReLU(inplace=False),
+                      nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
+                      nn.BatchNorm2d(C, affine=True))])
+
+    self.ops2 = nn.ModuleList(
+                  [nn.Sequential(
+                      nn.MaxPool2d(3, stride=2, padding=1),
+                      nn.BatchNorm2d(C, affine=True)),
+                   nn.Sequential(
+                      nn.MaxPool2d(3, stride=2, padding=1),
+                      nn.BatchNorm2d(C, affine=True))])
+
+
+  def forward(self, s0, s1, drop_prob = -1):
+    s0 = self.preprocess0(s0)
+    s1 = self.preprocess1(s1)
+
+    X0 = self.ops1[0] (s0)
+    X1 = self.ops1[1] (s1)
+    if self.training and drop_prob > 0.:
+      X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)
+
+    #X2 = self.ops2[0] (X0+X1)
+    X2 = self.ops2[0] (s0)
+    X3 = self.ops2[1] (s1)
+    if self.training and drop_prob > 0.:
+      X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)
+    return torch.cat([X0, X1, X2, X3], dim=1)
--- a/others/GDAS/lib/nas/genotypes.py
+++ b/others/GDAS/lib/nas/genotypes.py
@@ -0,0 +1,245 @@
+from collections import namedtuple
+
+Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
+
+PRIMITIVES = [
+    'none',
+    'max_pool_3x3',
+    'avg_pool_3x3',
+    'skip_connect',
+    'sep_conv_3x3',
+    'sep_conv_5x5',
+    'dil_conv_3x3',
+    'dil_conv_5x5'
+]
+
+NASNet = Genotype(
+  normal = [
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_5x5', 0, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 1, 1.0),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_7x7', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_7x7', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 0, 1.0),
+    ('skip_connect', 3, 1.0),
+    ('avg_pool_3x3', 2, 1.0),
+    ('sep_conv_3x3', 2, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+  ],
+  reduce_concat = [4, 5, 6],
+)
+    
+AmoebaNet = Genotype(
+  normal = [
+    ('avg_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_5x5', 2, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('avg_pool_3x3', 3, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ],
+  normal_concat = [4, 5, 6],
+  reduce = [
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('sep_conv_7x7', 2, 1.0),
+    ('sep_conv_7x7', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('conv_7x1_1x7', 0, 1.0),
+    ('sep_conv_3x3', 5, 1.0),
+  ],
+  reduce_concat = [3, 4, 6]
+)
+
+DARTS_V1 = Genotype(
+  normal=[
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 2, 1.0)],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('avg_pool_3x3', 0, 1.0)],
+  reduce_concat=[2, 3, 4, 5]
+)
+
+DARTS_V2 = Genotype(
+  normal=[
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('dil_conv_3x3', 2, 1.0)],
+  normal_concat=[2, 3, 4, 5],
+  reduce=[
+    ('max_pool_3x3', 0, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('skip_connect', 2, 1.0),
+    ('max_pool_3x3', 1, 1.0)],
+  reduce_concat=[2, 3, 4, 5]
+)
+
+PNASNet = Genotype(
+  normal = [
+    ('sep_conv_5x5', 0, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('sep_conv_7x7', 1, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 4, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 1, 1.0),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 0, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('sep_conv_7x7', 1, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 4, 1.0),
+    ('max_pool_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('skip_connect', 1, 1.0),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+# https://arxiv.org/pdf/1802.03268.pdf
+ENASNet = Genotype(
+  normal = [
+    ('sep_conv_3x3', 1, 1.0),
+    ('skip_connect', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('avg_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('avg_pool_3x3', 0, 1.0),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0), # 2
+    ('sep_conv_3x3', 1, 1.0),
+    ('avg_pool_3x3', 1, 1.0), # 3
+    ('sep_conv_3x3', 1, 1.0),
+    ('avg_pool_3x3', 1, 1.0), # 4
+    ('avg_pool_3x3', 1, 1.0),
+    ('sep_conv_5x5', 4, 1.0), # 5
+    ('sep_conv_3x3', 5, 1.0),
+    ('sep_conv_5x5', 0, 1.0),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)
+
+DARTS = DARTS_V2
+
+# Search by normal and reduce
+GDAS_V1 = Genotype(
+  normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
+  normal_concat=range(2, 6),
+  reduce=[('sep_conv_5x5', 0, 0.12862831354141235), ('sep_conv_3x3', 1, 0.12783904373645782), ('sep_conv_5x5', 2, 0.12725995481014252), ('sep_conv_5x5', 1, 0.12705285847187042), ('dil_conv_5x5', 2, 0.12797553837299347), ('sep_conv_3x3', 1, 0.12737272679805756), ('sep_conv_5x5', 0, 0.12833961844444275), ('sep_conv_5x5', 1, 0.12758426368236542)],
+  reduce_concat=range(2, 6)
+)
+
+# Search by normal and fixing reduction
+GDAS_F1 = Genotype(
+  normal=[('skip_connect', 0, 0.16), ('skip_connect', 1, 0.13), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.16), ('sep_conv_3x3', 2, 0.15)],
+  normal_concat=[2, 3, 4, 5],
+  reduce=None,
+  reduce_concat=[2, 3, 4, 5],
+)
+
+# Combine DMS_V1 and DMS_F1
+GDAS_GF = Genotype(
+  normal=[('skip_connect', 0, 0.13017432391643524), ('skip_connect', 1, 0.12947972118854523), ('skip_connect', 0, 0.13062666356563568), ('sep_conv_5x5', 2, 0.12980839610099792), ('sep_conv_3x3', 3, 0.12923765182495117), ('skip_connect', 0, 0.12901571393013), ('sep_conv_5x5', 4, 0.12938997149467468), ('sep_conv_3x3', 3, 0.1289220005273819)],
+  normal_concat=range(2, 6),
+  reduce=None,
+  reduce_concat=range(2, 6)
+)
+GDAS_FG = Genotype(
+  normal=[('skip_connect', 0, 0.16), ('skip_connect', 1, 0.13), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.17), ('sep_conv_3x3', 2, 0.15), ('skip_connect', 0, 0.16), ('sep_conv_3x3', 2, 0.15)],
+  normal_concat=range(2, 6),
+  reduce=[('sep_conv_5x5', 0, 0.12862831354141235), ('sep_conv_3x3', 1, 0.12783904373645782), ('sep_conv_5x5', 2, 0.12725995481014252), ('sep_conv_5x5', 1, 0.12705285847187042), ('dil_conv_5x5', 2, 0.12797553837299347), ('sep_conv_3x3', 1, 0.12737272679805756), ('sep_conv_5x5', 0, 0.12833961844444275), ('sep_conv_5x5', 1, 0.12758426368236542)],
+  reduce_concat=range(2, 6)
+)
+
+PDARTS = Genotype(
+  normal=[
+    ('skip_connect', 0, 1.0),
+    ('dil_conv_3x3', 1, 1.0),
+    ('skip_connect', 0, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 1, 1.0),
+    ('sep_conv_3x3', 3, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('dil_conv_5x5', 4, 1.0)],
+  normal_concat=range(2, 6),
+  reduce=[
+    ('avg_pool_3x3', 0, 1.0),
+    ('sep_conv_5x5', 1, 1.0),
+    ('sep_conv_3x3', 0, 1.0),
+    ('dil_conv_5x5', 2, 1.0),
+    ('max_pool_3x3', 0, 1.0),
+    ('dil_conv_3x3', 1, 1.0),
+    ('dil_conv_3x3', 1, 1.0),
+    ('dil_conv_5x5', 3, 1.0)],
+  reduce_concat=range(2, 6)
+)
+
+
+model_types = {'DARTS_V1': DARTS_V1,
+               'DARTS_V2': DARTS_V2,
+               'NASNet'  : NASNet,
+               'PNASNet' : PNASNet, 
+               'AmoebaNet': AmoebaNet,
+               'ENASNet' : ENASNet,
+               'PDARTS'  : PDARTS,
+               'GDAS_V1' : GDAS_V1,
+               'GDAS_F1' : GDAS_F1,
+               'GDAS_GF' : GDAS_GF,
+               'GDAS_FG' : GDAS_FG}
--- a/others/GDAS/lib/nas/head_utils.py
+++ b/others/GDAS/lib/nas/head_utils.py
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+class ImageNetHEAD(nn.Sequential):
+  def __init__(self, C, stride=2):
+    super(ImageNetHEAD, self).__init__()
+    self.add_module('conv1', nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False))
+    self.add_module('bn1'  , nn.BatchNorm2d(C // 2))
+    self.add_module('relu1', nn.ReLU(inplace=True))
+    self.add_module('conv2', nn.Conv2d(C // 2, C, kernel_size=3, stride=stride, padding=1, bias=False))
+    self.add_module('bn2'  , nn.BatchNorm2d(C))
+
+
+class CifarHEAD(nn.Sequential):
+  def __init__(self, C):
+    super(CifarHEAD, self).__init__()
+    self.add_module('conv', nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False))
+    self.add_module('bn', nn.BatchNorm2d(C))
--- a/others/GDAS/lib/nas/operations.py
+++ b/others/GDAS/lib/nas/operations.py
@@ -0,0 +1,122 @@
+import torch
+import torch.nn as nn
+
+OPS = {
+  'none'         : lambda C, stride, affine: Zero(stride),
+  'avg_pool_3x3' : lambda C, stride, affine: nn.Sequential(
+                                               nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
+                                               nn.BatchNorm2d(C, affine=False) ),
+  'max_pool_3x3' : lambda C, stride, affine: nn.Sequential(
+                                               nn.MaxPool2d(3, stride=stride, padding=1),
+                                               nn.BatchNorm2d(C, affine=False) ),
+  'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
+  'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
+  'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
+  'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
+  'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
+  'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
+  'conv_7x1_1x7' : lambda C, stride, affine: Conv717(C, C, stride, affine),
+}
+
+class Conv717(nn.Module):
+
+  def __init__(self, C_in, C_out, stride, affine):
+    super(Conv717, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in , C_out, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
+      nn.Conv2d(C_out, C_out, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
+      nn.BatchNorm2d(C_out, affine=affine)
+    )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class ReLUConvBN(nn.Module):
+
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
+    super(ReLUConvBN, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
+      nn.BatchNorm2d(C_out, affine=affine)
+    )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class DilConv(nn.Module):
+    
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
+    super(DilConv, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_out, affine=affine),
+      )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class SepConv(nn.Module):
+    
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
+    super(SepConv, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_in, affine=affine),
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_out, affine=affine),
+      )
+
+  def forward(self, x):
+    return self.op(x)
+
+
+class Identity(nn.Module):
+
+  def __init__(self):
+    super(Identity, self).__init__()
+
+  def forward(self, x):
+    return x
+
+
+class Zero(nn.Module):
+
+  def __init__(self, stride):
+    super(Zero, self).__init__()
+    self.stride = stride
+
+  def forward(self, x):
+    if self.stride == 1:
+      return x.mul(0.)
+    return x[:,:,::self.stride,::self.stride].mul(0.)
+
+
+class FactorizedReduce(nn.Module):
+
+  def __init__(self, C_in, C_out, affine=True):
+    super(FactorizedReduce, self).__init__()
+    assert C_out % 2 == 0
+    self.relu = nn.ReLU(inplace=False)
+    self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
+    self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) 
+    self.bn = nn.BatchNorm2d(C_out, affine=affine)
+    self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
+
+
+  def forward(self, x):
+    x = self.relu(x)
+    y = self.pad(x)
+    out = torch.cat([self.conv_1(x), self.conv_2(y[:,:,1:,1:])], dim=1)
+    out = self.bn(out)
+    return out
--- a/others/GDAS/lib/nas_rnn/init.py
+++ b/others/GDAS/lib/nas_rnn/init.py
@@ -0,0 +1,9 @@
+# utils
+from .utils import batchify, get_batch, repackage_hidden
+# models
+from .model_search import RNNModelSearch
+from .model_search import DARTSCellSearch
+from .basemodel import DARTSCell, RNNModel
+# architecture
+from .genotypes import DARTS_V1, DARTS_V2
+from .genotypes import GDAS
--- a/others/GDAS/lib/nas_rnn/basemodel.py
+++ b/others/GDAS/lib/nas_rnn/basemodel.py
@@ -0,0 +1,181 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .genotypes import STEPS
+from .utils import mask2d, LockedDropout, embedded_dropout
+
+
+INITRANGE = 0.04
+
+def none_func(x):
+  return x * 0
+
+
+class DARTSCell(nn.Module):
+
+  def __init__(self, ninp, nhid, dropouth, dropoutx, genotype):
+    super(DARTSCell, self).__init__()
+    self.nhid = nhid
+    self.dropouth = dropouth
+    self.dropoutx = dropoutx
+    self.genotype = genotype
+
+    # genotype is None when doing arch search
+    steps = len(self.genotype.recurrent) if self.genotype is not None else STEPS
+    self._W0 = nn.Parameter(torch.Tensor(ninp+nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE))
+    self._Ws = nn.ParameterList([
+        nn.Parameter(torch.Tensor(nhid, 2*nhid).uniform_(-INITRANGE, INITRANGE)) for i in range(steps)
+    ])
+
+  def forward(self, inputs, hidden, arch_probs):
+    T, B = inputs.size(0), inputs.size(1)
+
+    if self.training:
+      x_mask = mask2d(B, inputs.size(2), keep_prob=1.-self.dropoutx)
+      h_mask = mask2d(B, hidden.size(2), keep_prob=1.-self.dropouth)
+    else:
+      x_mask = h_mask = None
+
+    hidden = hidden[0]
+    hiddens = []
+    for t in range(T):
+      hidden = self.cell(inputs[t], hidden, x_mask, h_mask, arch_probs)
+      hiddens.append(hidden)
+    hiddens = torch.stack(hiddens)
+    return hiddens, hiddens[-1].unsqueeze(0)
+
+  def _compute_init_state(self, x, h_prev, x_mask, h_mask):
+    if self.training:
+      xh_prev = torch.cat([x * x_mask, h_prev * h_mask], dim=-1)
+    else:
+      xh_prev = torch.cat([x, h_prev], dim=-1)
+    c0, h0 = torch.split(xh_prev.mm(self._W0), self.nhid, dim=-1)
+    c0 = c0.sigmoid()
+    h0 = h0.tanh()
+    s0 = h_prev + c0 * (h0-h_prev)
+    return s0
+
+  def _get_activation(self, name):
+    if name == 'tanh':
+      f = torch.tanh
+    elif name == 'relu':
+      f = torch.relu
+    elif name == 'sigmoid':
+      f = torch.sigmoid
+    elif name == 'identity':
+      f = lambda x: x
+    elif name == 'none':
+      f = none_func
+    else:
+      raise NotImplementedError
+    return f
+
+  def cell(self, x, h_prev, x_mask, h_mask, _):
+    s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
+
+    states = [s0]
+    for i, (name, pred) in enumerate(self.genotype.recurrent):
+      s_prev = states[pred]
+      if self.training:
+        ch = (s_prev * h_mask).mm(self._Ws[i])
+      else:
+        ch = s_prev.mm(self._Ws[i])
+      c, h = torch.split(ch, self.nhid, dim=-1)
+      c = c.sigmoid()
+      fn = self._get_activation(name)
+      h = fn(h)
+      s = s_prev + c * (h-s_prev)
+      states += [s]
+    output = torch.mean(torch.stack([states[i] for i in self.genotype.concat], -1), -1)
+    return output
+
+
+class RNNModel(nn.Module):
+  """Container module with an encoder, a recurrent module, and a decoder."""
+  def __init__(self, ntoken, ninp, nhid, nhidlast, 
+                 dropout=0.5, dropouth=0.5, dropoutx=0.5, dropouti=0.5, dropoute=0.1,
+                 cell_cls=None, genotype=None):
+    super(RNNModel, self).__init__()
+    self.lockdrop = LockedDropout()
+    self.encoder = nn.Embedding(ntoken, ninp)
+        
+    assert ninp == nhid == nhidlast
+    if cell_cls == DARTSCell:
+      assert genotype is not None
+      rnns = [cell_cls(ninp, nhid, dropouth, dropoutx, genotype)]
+    else:
+      assert genotype is None
+      rnns = [cell_cls(ninp, nhid, dropouth, dropoutx)]
+
+    self.rnns    = torch.nn.ModuleList(rnns)
+    self.decoder = nn.Linear(ninp, ntoken)
+    self.decoder.weight = self.encoder.weight
+    self.init_weights()
+    self.arch_weights = None
+
+    self.ninp = ninp
+    self.nhid = nhid
+    self.nhidlast = nhidlast
+    self.dropout = dropout
+    self.dropouti = dropouti
+    self.dropoute = dropoute
+    self.ntoken = ntoken
+    self.cell_cls = cell_cls
+    # acceleration
+    self.tau = None
+    self.use_gumbel = False
+
+  def set_gumbel(self, use_gumbel, set_check):
+    self.use_gumbel = use_gumbel
+    for i, rnn in enumerate(self.rnns):
+      rnn.set_check(set_check)
+
+  def set_tau(self, tau):
+    self.tau = tau
+  
+  def get_tau(self):
+    return self.tau
+
+  def init_weights(self):
+    self.encoder.weight.data.uniform_(-INITRANGE, INITRANGE)
+    self.decoder.bias.data.fill_(0)
+    self.decoder.weight.data.uniform_(-INITRANGE, INITRANGE)
+
+  def forward(self, input, hidden, return_h=False):
+    batch_size = input.size(1)
+
+    emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0)
+    emb = self.lockdrop(emb, self.dropouti)
+
+    raw_output = emb
+    new_hidden = []
+    raw_outputs = []
+    outputs = []
+    if self.arch_weights is None:
+      arch_probs = None
+    else:
+      if self.use_gumbel: arch_probs = F.gumbel_softmax(self.arch_weights, self.tau, False)
+      else              : arch_probs = F.softmax(self.arch_weights, dim=-1)
+
+    for l, rnn in enumerate(self.rnns):
+      current_input = raw_output
+      raw_output, new_h = rnn(raw_output, hidden[l], arch_probs)
+      new_hidden.append(new_h)
+      raw_outputs.append(raw_output)
+    hidden = new_hidden
+
+    output = self.lockdrop(raw_output, self.dropout)
+    outputs.append(output)
+
+    logit = self.decoder(output.view(-1, self.ninp))
+    log_prob = nn.functional.log_softmax(logit, dim=-1)
+    model_output = log_prob
+    model_output = model_output.view(-1, batch_size, self.ntoken)
+
+    if return_h: return model_output, hidden, raw_outputs, outputs
+    else       : return model_output, hidden
+
+  def init_hidden(self, bsz):
+    weight = next(self.parameters()).clone()
+    return [weight.new(1, bsz, self.nhid).zero_()]
--- a/others/GDAS/lib/nas_rnn/genotypes.py
+++ b/others/GDAS/lib/nas_rnn/genotypes.py
@@ -0,0 +1,55 @@
+from collections import namedtuple
+
+Genotype = namedtuple('Genotype', 'recurrent concat')
+
+PRIMITIVES = [
+    'none',
+    'tanh',
+    'relu',
+    'sigmoid',
+    'identity'
+]
+STEPS = 8
+CONCAT = 8
+
+ENAS = Genotype(
+    recurrent = [
+        ('tanh', 0),
+        ('tanh', 1),
+        ('relu', 1),
+        ('tanh', 3),
+        ('tanh', 3),
+        ('relu', 3),
+        ('relu', 4),
+        ('relu', 7),
+        ('relu', 8),
+        ('relu', 8),
+        ('relu', 8),
+    ],
+    concat = [2, 5, 6, 9, 10, 11]
+)
+
+DARTS_V1 = Genotype(
+  recurrent = [
+    ('relu', 0),
+    ('relu', 1),
+    ('tanh', 2),
+    ('relu', 3), ('relu', 4), ('identity', 1), ('relu', 5), ('relu', 1)
+  ],
+  concat=range(1, 9)
+)
+
+DARTS_V2 = Genotype(
+  recurrent = [
+    ('sigmoid', 0), ('relu', 1), ('relu', 1),
+    ('identity', 1), ('tanh', 2), ('sigmoid', 5),
+    ('tanh', 3), ('relu', 5)
+  ],
+  concat=range(1, 9)
+)
+
+GDAS = Genotype(
+  recurrent=[('relu', 0), ('relu', 0), ('identity', 1), ('relu', 1), ('tanh', 0), ('relu', 2), ('identity', 4), ('identity', 2)],
+  concat=range(1, 9)
+)
+
--- a/others/GDAS/lib/nas_rnn/model_search.py
+++ b/others/GDAS/lib/nas_rnn/model_search.py
@@ -0,0 +1,104 @@
+import copy, torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import namedtuple
+from .genotypes import PRIMITIVES, STEPS, CONCAT, Genotype
+from .basemodel import DARTSCell, RNNModel
+
+
+class DARTSCellSearch(DARTSCell):
+
+  def __init__(self, ninp, nhid, dropouth, dropoutx):
+    super(DARTSCellSearch, self).__init__(ninp, nhid, dropouth, dropoutx, genotype=None)
+    self.bn = nn.BatchNorm1d(nhid, affine=False)
+    self.check_zero = False
+
+  def set_check(self, check_zero):
+    self.check_zero = check_zero
+
+  def cell(self, x, h_prev, x_mask, h_mask, arch_probs):
+    s0 = self._compute_init_state(x, h_prev, x_mask, h_mask)
+    s0 = self.bn(s0)
+    if self.check_zero:
+      arch_probs_cpu = arch_probs.cpu().tolist()
+    #arch_probs = F.softmax(self.weights, dim=-1)
+
+    offset = 0
+    states = s0.unsqueeze(0)
+    for i in range(STEPS):
+      if self.training:
+        masked_states = states * h_mask.unsqueeze(0)
+      else:
+        masked_states = states
+      ch = masked_states.view(-1, self.nhid).mm(self._Ws[i]).view(i+1, -1, 2*self.nhid)
+      c, h = torch.split(ch, self.nhid, dim=-1)
+      c = c.sigmoid()
+
+      s = torch.zeros_like(s0)
+      for k, name in enumerate(PRIMITIVES):
+        if name == 'none':
+          continue
+        fn = self._get_activation(name)
+        unweighted = states + c * (fn(h) - states)
+        if self.check_zero:
+          INDEX, INDDX = [], []
+          for jj in range(offset, offset+i+1):
+            if arch_probs_cpu[jj][k] > 0:
+              INDEX.append(jj)
+              INDDX.append(jj-offset)
+          if len(INDEX) == 0: continue
+          s += torch.sum(arch_probs[INDEX, k].unsqueeze(-1).unsqueeze(-1) * unweighted[INDDX, :, :], dim=0)
+        else:
+          s += torch.sum(arch_probs[offset:offset+i+1, k].unsqueeze(-1).unsqueeze(-1) * unweighted, dim=0)
+      s = self.bn(s)
+      states = torch.cat([states, s.unsqueeze(0)], 0)
+      offset += i+1
+    output = torch.mean(states[-CONCAT:], dim=0)
+    return output
+
+
+class RNNModelSearch(RNNModel):
+
+  def __init__(self, *args):
+    super(RNNModelSearch, self).__init__(*args)
+    self._args = copy.deepcopy( args )
+
+    k = sum(i for i in range(1, STEPS+1))
+    self.arch_weights = nn.Parameter(torch.Tensor(k, len(PRIMITIVES)))
+    nn.init.normal_(self.arch_weights, 0, 0.001)
+
+  def base_parameters(self):
+    lists  = list(self.lockdrop.parameters())
+    lists += list(self.encoder.parameters())
+    lists += list(self.rnns.parameters())
+    lists += list(self.decoder.parameters())
+    return lists
+
+  def arch_parameters(self):
+    return [self.arch_weights]
+
+  def genotype(self):
+
+    def _parse(probs):
+      gene = []
+      start = 0
+      for i in range(STEPS):
+        end = start + i + 1
+        W = probs[start:end].copy()
+        #j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[0]
+        j = sorted(range(i + 1), key=lambda x: -max(W[x][k] for k in range(len(W[x])) ))[0]
+        k_best = None
+        for k in range(len(W[j])):
+          #if k != PRIMITIVES.index('none'):
+          #  if k_best is None or W[j][k] > W[j][k_best]:
+          #    k_best = k
+          if k_best is None or W[j][k] > W[j][k_best]:
+            k_best = k
+        gene.append((PRIMITIVES[k_best], j))
+        start = end
+      return gene
+
+    with torch.no_grad():
+      gene = _parse(F.softmax(self.arch_weights, dim=-1).cpu().numpy())
+    genotype = Genotype(recurrent=gene, concat=list(range(STEPS+1)[-CONCAT:]))
+    return genotype
--- a/others/GDAS/lib/nas_rnn/utils.py
+++ b/others/GDAS/lib/nas_rnn/utils.py
@@ -0,0 +1,66 @@
+import torch
+import torch.nn as nn
+import os, shutil
+import numpy as np
+
+
+def repackage_hidden(h):
+  if isinstance(h, torch.Tensor):
+    return h.detach()
+  else:
+    return tuple(repackage_hidden(v) for v in h)
+
+
+def batchify(data, bsz, use_cuda):
+  nbatch = data.size(0) // bsz
+  data = data.narrow(0, 0, nbatch * bsz)
+  data = data.view(bsz, -1).t().contiguous()
+  if use_cuda: return data.cuda()
+  else     : return data
+
+
+def get_batch(source, i, seq_len):
+  seq_len = min(seq_len, len(source) - 1 - i)
+  data    = source[i:i+seq_len].clone()
+  target  = source[i+1:i+1+seq_len].clone()
+  return data, target
+
+
+
+def embedded_dropout(embed, words, dropout=0.1, scale=None):
+  if dropout:
+    mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout)
+    mask.requires_grad_(True)
+    masked_embed_weight = mask * embed.weight
+  else:
+    masked_embed_weight = embed.weight
+  if scale:
+    masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight
+
+  padding_idx = embed.padding_idx
+  if padding_idx is None:
+    padding_idx = -1
+  X = torch.nn.functional.embedding(
+        words, masked_embed_weight,
+        padding_idx, embed.max_norm, embed.norm_type,
+        embed.scale_grad_by_freq, embed.sparse)
+  return X
+
+
+class LockedDropout(nn.Module):
+  def __init__(self):
+    super(LockedDropout, self).__init__()
+
+  def forward(self, x, dropout=0.5):
+    if not self.training or not dropout:
+      return x
+    m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
+    mask = m.div_(1 - dropout).detach()
+    mask = mask.expand_as(x)
+    return mask * x
+
+
+def mask2d(B, D, keep_prob, cuda=True):
+  m = torch.floor(torch.rand(B, D) + keep_prob) / keep_prob
+  if cuda: return m.cuda()
+  else   : return m
--- a/others/GDAS/lib/scheduler/init.py
+++ b/others/GDAS/lib/scheduler/init.py
@@ -0,0 +1,5 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .utils import load_config
+from .scheduler import MultiStepLR, obtain_scheduler
--- a/others/GDAS/lib/scheduler/scheduler.py
+++ b/others/GDAS/lib/scheduler/scheduler.py
@@ -0,0 +1,32 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import torch
+from bisect import bisect_right
+
+
+class MultiStepLR(torch.optim.lr_scheduler._LRScheduler):
+
+  def __init__(self, optimizer, milestones, gammas, last_epoch=-1):
+    if not list(milestones) == sorted(milestones):
+      raise ValueError('Milestones should be a list of'
+                       ' increasing integers. Got {:}', milestones)
+    assert len(milestones) == len(gammas), '{:} vs {:}'.format(milestones, gammas)
+    self.milestones = milestones
+    self.gammas = gammas
+    super(MultiStepLR, self).__init__(optimizer, last_epoch)
+
+  def get_lr(self):
+    LR = 1
+    for x in self.gammas[:bisect_right(self.milestones, self.last_epoch)]: LR = LR * x
+    return [base_lr * LR for base_lr in self.base_lrs]
+
+
+def obtain_scheduler(config, optimizer):
+  if config.type == 'multistep':
+    scheduler = MultiStepLR(optimizer, milestones=config.milestones, gammas=config.gammas)
+  elif config.type == 'cosine':
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs)
+  else:
+    raise ValueError('Unknown learning rate scheduler type : {:}'.format(config.type))
+  return scheduler
--- a/others/GDAS/lib/scheduler/utils.py
+++ b/others/GDAS/lib/scheduler/utils.py
@@ -0,0 +1,42 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, json
+from pathlib import Path
+from collections import namedtuple
+
+support_types = ('str', 'int', 'bool', 'float')
+
+def convert_param(original_lists):
+  assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists)
+  ctype, value = original_lists[0], original_lists[1]
+  assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types)
+  is_list = isinstance(value, list)
+  if not is_list: value = [value]
+  outs = []
+  for x in value:
+    if ctype == 'int':
+      x = int(x)
+    elif ctype == 'str':
+      x = str(x)
+    elif ctype == 'bool':
+      x = bool(int(x))
+    elif ctype == 'float':
+      x = float(x)
+    else:
+      raise TypeError('Does not know this type : {:}'.format(ctype))
+    outs.append(x)
+  if not is_list: outs = outs[0]
+  return outs
+
+def load_config(path):
+  path = str(path)
+  assert os.path.exists(path), 'Can not find {:}'.format(path)
+  # Reading data back
+  with open(path, 'r') as f:
+    data = json.load(f)
+  f.close()
+  content = { k: convert_param(v) for k,v in data.items()}
+  Arguments = namedtuple('Configure', ' '.join(content.keys()))
+  content = Arguments(**content)
+  return content
--- a/others/GDAS/lib/utils/init.py
+++ b/others/GDAS/lib/utils/init.py
@@ -0,0 +1,16 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+from .utils import AverageMeter, RecorderMeter, convert_secs2time
+from .utils import time_file_str, time_string
+from .utils import test_imagenet_data
+from .utils import print_log
+from .evaluation_utils import obtain_accuracy
+#from .draw_pts import draw_points
+from .gpu_manager import GPUManager
+
+from .save_meta import Save_Meta
+
+from .model_utils import count_parameters_in_MB
+from .model_utils import Cutout
+from .flop_benchmark import print_FLOPs
--- a/others/GDAS/lib/utils/draw_pts.py
+++ b/others/GDAS/lib/utils/draw_pts.py
@@ -0,0 +1,41 @@
+import os, sys, time
+import numpy as np
+import matplotlib
+import random
+matplotlib.use('agg')
+import matplotlib.pyplot as plt
+import matplotlib.cm as cm
+
+def draw_points(points, labels, save_path):
+  title = 'the visualized features'
+  dpi = 100 
+  width, height = 1000, 1000
+  legend_fontsize = 10
+  figsize = width / float(dpi), height / float(dpi)
+  fig = plt.figure(figsize=figsize)
+
+  classes = np.unique(labels).tolist()
+  colors = cm.rainbow(np.linspace(0, 1, len(classes)))
+
+  legends = []
+  legendnames = []
+
+  for cls, c in zip(classes, colors):
+    
+    indexes = labels == cls
+    ptss = points[indexes, :]
+    x = ptss[:,0]
+    y = ptss[:,1]
+    if cls % 2 == 0: marker = 'x'
+    else:            marker = 'o'
+    legend = plt.scatter(x, y, color=c, s=1, marker=marker)
+    legendname = '{:02d}'.format(cls+1)
+    legends.append( legend )
+    legendnames.append( legendname )
+
+  plt.legend(legends, legendnames, scatterpoints=1, ncol=5, fontsize=8)
+
+  if save_path is not None:
+    fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
+    print ('---- save figure {} into {}'.format(title, save_path))
+  plt.close(fig)
--- a/others/GDAS/lib/utils/evaluation_utils.py
+++ b/others/GDAS/lib/utils/evaluation_utils.py
@@ -0,0 +1,16 @@
+import torch
+
+def obtain_accuracy(output, target, topk=(1,)):
+  """Computes the precision@k for the specified values of k"""
+  maxk = max(topk)
+  batch_size = target.size(0)
+
+  _, pred = output.topk(maxk, 1, True, True)
+  pred = pred.t()
+  correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+  res = []
+  for k in topk:
+    correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+    res.append(correct_k.mul_(100.0 / batch_size))
+  return res
--- a/others/GDAS/lib/utils/flop_benchmark.py
+++ b/others/GDAS/lib/utils/flop_benchmark.py
@@ -0,0 +1,116 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+# modified from https://github.com/warmspringwinds/pytorch-segmentation-detection/blob/master/pytorch_segmentation_detection/utils/flops_benchmark.py
+import copy, torch
+
+def print_FLOPs(model, shape, logs):
+  print_log, log = logs
+  model = copy.deepcopy( model )
+
+  model = add_flops_counting_methods(model)
+  model = model.cuda()
+  model.eval()
+
+  cache_inputs = torch.zeros(*shape).cuda()
+  #print_log('In the calculating function : cache input size : {:}'.format(cache_inputs.size()), log)
+  _ = model(cache_inputs)
+  FLOPs = compute_average_flops_cost( model ) / 1e6
+  print_log('FLOPs : {:} MB'.format(FLOPs), log)
+  torch.cuda.empty_cache()
+
+
+# ---- Public functions
+def add_flops_counting_methods( model ):
+  model.__batch_counter__ = 0
+  add_batch_counter_hook_function( model )
+  model.apply( add_flops_counter_variable_or_reset )
+  model.apply( add_flops_counter_hook_function )
+  return model
+
+
+
+def compute_average_flops_cost(model):
+  """
+  A method that will be available after add_flops_counting_methods() is called on a desired net object.
+  Returns current mean flops consumption per image.
+  """
+  batches_count = model.__batch_counter__
+  flops_sum = 0
+  for module in model.modules():
+    if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
+      flops_sum += module.__flops__
+  return flops_sum / batches_count
+
+
+# ---- Internal functions
+def pool_flops_counter_hook(pool_module, inputs, output):
+  batch_size = inputs[0].size(0)
+  kernel_size = pool_module.kernel_size
+  out_C, output_height, output_width = output.shape[1:]
+  assert out_C == inputs[0].size(1), '{:} vs. {:}'.format(out_C, inputs[0].size())
+
+  overall_flops = batch_size * out_C * output_height * output_width * kernel_size * kernel_size
+  pool_module.__flops__ += overall_flops
+
+
+def fc_flops_counter_hook(fc_module, inputs, output):
+  batch_size = inputs[0].size(0)
+  xin, xout = fc_module.in_features, fc_module.out_features
+  assert xin == inputs[0].size(1) and xout == output.size(1), 'IO=({:}, {:})'.format(xin, xout)
+  overall_flops = batch_size * xin * xout
+  if fc_module.bias is not None:
+    overall_flops += batch_size * xout
+  fc_module.__flops__ += overall_flops
+
+
+def conv_flops_counter_hook(conv_module, inputs, output):
+  batch_size = inputs[0].size(0)
+  output_height, output_width = output.shape[2:]
+  
+  kernel_height, kernel_width = conv_module.kernel_size
+  in_channels  = conv_module.in_channels
+  out_channels = conv_module.out_channels
+  groups       = conv_module.groups
+  conv_per_position_flops = kernel_height * kernel_width * in_channels * out_channels / groups
+  
+  active_elements_count = batch_size * output_height * output_width
+  overall_flops = conv_per_position_flops * active_elements_count
+    
+  if conv_module.bias is not None:
+    overall_flops += out_channels * active_elements_count
+  conv_module.__flops__ += overall_flops
+
+  
+def batch_counter_hook(module, inputs, output):
+  # Can have multiple inputs, getting the first one
+  inputs = inputs[0]
+  batch_size = inputs.shape[0]
+  module.__batch_counter__ += batch_size
+
+
+def add_batch_counter_hook_function(module):
+  if not hasattr(module, '__batch_counter_handle__'):
+    handle = module.register_forward_hook(batch_counter_hook)
+    module.__batch_counter_handle__ = handle
+
+  
+def add_flops_counter_variable_or_reset(module):
+  if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear) \
+    or isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
+    module.__flops__ = 0
+
+
+def add_flops_counter_hook_function(module):
+  if isinstance(module, torch.nn.Conv2d):
+    if not hasattr(module, '__flops_handle__'):
+      handle = module.register_forward_hook(conv_flops_counter_hook)
+      module.__flops_handle__ = handle
+  elif isinstance(module, torch.nn.Linear):
+    if not hasattr(module, '__flops_handle__'):
+      handle = module.register_forward_hook(fc_flops_counter_hook)
+      module.__flops_handle__ = handle
+  elif isinstance(module, torch.nn.AvgPool2d) or isinstance(module, torch.nn.MaxPool2d):
+    if not hasattr(module, '__flops_handle__'):
+      handle = module.register_forward_hook(pool_flops_counter_hook)
+      module.__flops_handle__ = handle
--- a/others/GDAS/lib/utils/gpu_manager.py
+++ b/others/GDAS/lib/utils/gpu_manager.py
@@ -0,0 +1,70 @@
+import os
+
+class GPUManager():
+  queries = ('index', 'gpu_name', 'memory.free', 'memory.used', 'memory.total', 'power.draw', 'power.limit')
+
+  def __init__(self):
+    all_gpus = self.query_gpu(False)
+
+  def get_info(self, ctype):
+    cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(ctype)
+    lines = os.popen(cmd).readlines()
+    lines = [line.strip('\n') for line in lines]
+    return lines
+
+  def query_gpu(self, show=True):
+    num_gpus = len( self.get_info('index') )
+    all_gpus = [ {} for i in range(num_gpus) ]
+    for query in self.queries:
+      infos = self.get_info(query)
+      for idx, info in enumerate(infos):
+        all_gpus[idx][query] = info
+
+    if 'CUDA_VISIBLE_DEVICES' in os.environ:
+      CUDA_VISIBLE_DEVICES = os.environ['CUDA_VISIBLE_DEVICES'].split(',')
+      selected_gpus = []
+      for idx, CUDA_VISIBLE_DEVICE in enumerate(CUDA_VISIBLE_DEVICES):
+        find = False
+        for gpu in all_gpus:
+          if gpu['index'] == CUDA_VISIBLE_DEVICE:
+            assert find==False, 'Duplicate cuda device index : {}'.format(CUDA_VISIBLE_DEVICE)
+            find = True
+            selected_gpus.append( gpu.copy() )
+            selected_gpus[-1]['index'] = '{}'.format(idx)
+        assert find, 'Does not find the device : {}'.format(CUDA_VISIBLE_DEVICE)
+      all_gpus = selected_gpus
+    
+    if show:
+      allstrings = ''
+      for gpu in all_gpus:
+        string = '| '
+        for query in self.queries:
+          if query.find('memory') == 0: xinfo = '{:>9}'.format(gpu[query])
+          else:                         xinfo = gpu[query]
+          string = string + query + ' : ' + xinfo + ' | '
+        allstrings = allstrings + string + '\n'
+      return allstrings
+    else:
+      return all_gpus
+
+  def select_by_memory(self, numbers=1):
+    all_gpus = self.query_gpu(False)
+    assert numbers <= len(all_gpus), 'Require {} gpus more than you have'.format(numbers)
+    alls = []
+    for idx, gpu in enumerate(all_gpus):
+      free_memory = gpu['memory.free']
+      free_memory = free_memory.split(' ')[0]
+      free_memory = int(free_memory)
+      index = gpu['index']
+      alls.append((free_memory, index))
+    alls.sort(reverse = True)
+    alls = [ int(alls[i][1]) for i in range(numbers) ]
+    return sorted(alls)
+
+"""
+if __name__ == '__main__':
+  manager = GPUManager()
+  manager.query_gpu(True)
+  indexes = manager.select_by_memory(3)
+  print (indexes)
+"""
--- a/others/GDAS/lib/utils/model_utils.py
+++ b/others/GDAS/lib/utils/model_utils.py
@@ -0,0 +1,35 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+def count_parameters_in_MB(model):
+  if isinstance(model, nn.Module):
+    return np.sum(np.prod(v.size()) for v in model.parameters())/1e6
+  else:
+    return np.sum(np.prod(v.size()) for v in model)/1e6
+
+
+class Cutout(object):
+  def __init__(self, length):
+    self.length = length
+
+  def __repr__(self):
+    return ('{name}(length={length})'.format(name=self.__class__.__name__, **self.__dict__))
+
+  def __call__(self, img):
+    h, w = img.size(1), img.size(2)
+    mask = np.ones((h, w), np.float32)
+    y = np.random.randint(h)
+    x = np.random.randint(w)
+
+    y1 = np.clip(y - self.length // 2, 0, h)
+    y2 = np.clip(y + self.length // 2, 0, h)
+    x1 = np.clip(x - self.length // 2, 0, w)
+    x2 = np.clip(x + self.length // 2, 0, w)
+
+    mask[y1: y2, x1: x2] = 0.
+    mask = torch.from_numpy(mask)
+    mask = mask.expand_as(img)
+    img *= mask
+    return img
--- a/others/GDAS/lib/utils/save_meta.py
+++ b/others/GDAS/lib/utils/save_meta.py
@@ -0,0 +1,53 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import torch
+import os, sys
+import os.path as osp
+import numpy as np
+
+def tensor2np(x):
+  if isinstance(x, np.ndarray): return x
+  if x.is_cuda: x = x.cpu()
+  return x.numpy()
+
+class Save_Meta():
+
+  def __init__(self):
+    self.reset()
+
+  def __repr__(self):
+    return ('{name}'.format(name=self.__class__.__name__)+'(number of data = {})'.format(len(self)))
+
+  def reset(self):
+    self.predictions = []
+    self.groundtruth = []
+    
+  def __len__(self):
+    return len(self.predictions)
+
+  def append(self, _pred, _ground):
+    _pred, _ground = tensor2np(_pred), tensor2np(_ground)
+    assert _ground.shape[0] == _pred.shape[0] and len(_pred.shape) == 2 and len(_ground.shape) == 1, 'The shapes are wrong : {} & {}'.format(_pred.shape, _ground.shape)
+    self.predictions.append(_pred)
+    self.groundtruth.append(_ground)
+
+  def save(self, save_dir, filename, test=True):
+    meta = {'predictions': self.predictions, 
+            'groundtruth': self.groundtruth}
+    filename = osp.join(save_dir, filename)
+    torch.save(meta, filename)
+    if test:
+      predictions = np.concatenate(self.predictions)
+      groundtruth = np.concatenate(self.groundtruth)
+      predictions = np.argmax(predictions, axis=1)
+      accuracy = np.sum(groundtruth==predictions) * 100.0 / predictions.size
+    else:
+      accuracy = None
+    print ('save save_meta into {} with accuracy = {}'.format(filename, accuracy))
+
+  def load(self, filename):
+    assert os.path.isfile(filename), '{} is not a file'.format(filename)
+    checkpoint       = torch.load(filename)
+    self.predictions = checkpoint['predictions']
+    self.groundtruth = checkpoint['groundtruth']
--- a/others/GDAS/lib/utils/utils.py
+++ b/others/GDAS/lib/utils/utils.py
@@ -0,0 +1,140 @@
+##################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
+##################################################
+import os, sys, time
+import numpy as np
+import random
+
+class AverageMeter(object):
+  """Computes and stores the average and current value"""
+  def __init__(self):
+    self.reset()
+
+  def reset(self):
+    self.val = 0
+    self.avg = 0
+    self.sum = 0
+    self.count = 0
+
+  def update(self, val, n=1):
+    self.val = val
+    self.sum += val * n
+    self.count += n
+    self.avg = self.sum / self.count
+
+
+class RecorderMeter(object):
+  """Computes and stores the minimum loss value and its epoch index"""
+  def __init__(self, total_epoch):
+    self.reset(total_epoch)
+
+  def reset(self, total_epoch):
+    assert total_epoch > 0
+    self.total_epoch   = total_epoch
+    self.current_epoch = 0
+    self.epoch_losses  = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
+    self.epoch_losses  = self.epoch_losses - 1
+
+    self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
+    self.epoch_accuracy= self.epoch_accuracy
+
+  def update(self, idx, train_loss, train_acc, val_loss, val_acc):
+    assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx)
+    self.epoch_losses  [idx, 0] = train_loss
+    self.epoch_losses  [idx, 1] = val_loss
+    self.epoch_accuracy[idx, 0] = train_acc
+    self.epoch_accuracy[idx, 1] = val_acc
+    self.current_epoch = idx + 1
+    return self.max_accuracy(False) == self.epoch_accuracy[idx, 1]
+
+  def max_accuracy(self, istrain):
+    if self.current_epoch <= 0: return 0
+    if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max()
+    else:       return self.epoch_accuracy[:self.current_epoch, 1].max()
+
+  def plot_curve(self, save_path):
+    import matplotlib
+    matplotlib.use('agg')
+    import matplotlib.pyplot as plt
+    title = 'the accuracy/loss curve of train/val'
+    dpi = 100 
+    width, height = 1600, 1000
+    legend_fontsize = 10
+    figsize = width / float(dpi), height / float(dpi)
+
+    fig = plt.figure(figsize=figsize)
+    x_axis = np.array([i for i in range(self.total_epoch)]) # epochs
+    y_axis = np.zeros(self.total_epoch)
+
+    plt.xlim(0, self.total_epoch)
+    plt.ylim(0, 100)
+    interval_y = 5
+    interval_x = 5
+    plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x))
+    plt.yticks(np.arange(0, 100 + interval_y, interval_y))
+    plt.grid()
+    plt.title(title, fontsize=20)
+    plt.xlabel('the training epoch', fontsize=16)
+    plt.ylabel('accuracy', fontsize=16)
+  
+    y_axis[:] = self.epoch_accuracy[:, 0]
+    plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    y_axis[:] = self.epoch_accuracy[:, 1]
+    plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    
+    y_axis[:] = self.epoch_losses[:, 0]
+    plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    y_axis[:] = self.epoch_losses[:, 1]
+    plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2)
+    plt.legend(loc=4, fontsize=legend_fontsize)
+
+    if save_path is not None:
+      fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
+      print ('---- save figure {} into {}'.format(title, save_path))
+    plt.close(fig)
+    
+def print_log(print_string, log):
+  print ("{:}".format(print_string))
+  if log is not None:
+    log.write('{}\n'.format(print_string))
+    log.flush()
+
+def time_file_str():
+  ISOTIMEFORMAT='%Y-%m-%d'
+  string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string + '-{}'.format(random.randint(1, 10000))
+
+def time_string():
+  ISOTIMEFORMAT='%Y-%m-%d-%X'
+  string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
+  return string
+
+def convert_secs2time(epoch_time, return_str=False):
+  need_hour = int(epoch_time / 3600)
+  need_mins = int((epoch_time - 3600*need_hour) / 60)
+  need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
+  if return_str == False:
+    return need_hour, need_mins, need_secs
+  else:
+    return '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
+
+def test_imagenet_data(imagenet):
+  total_length = len(imagenet)
+  assert total_length == 1281166 or total_length == 50000, 'The length of ImageNet is wrong : {}'.format(total_length)
+  map_id = {}
+  for index in range(total_length):
+    path, target = imagenet.imgs[index]
+    folder, image_name = os.path.split(path)
+    _, folder = os.path.split(folder)
+    if folder not in map_id:
+      map_id[folder] = target
+    else:
+      assert map_id[folder] == target, 'Class : {} is not {}'.format(folder, target)
+    assert image_name.find(folder) == 0, '{} is wrong.'.format(path)
+  print ('Check ImageNet Dataset OK')