update scripts-cluster

2019-03-31 22:49:43 +08:00
parent 280c9f3099
commit 4bac459bf9
20 changed files with 118 additions and 1248 deletions
--- a/lib/datasets/get_dataset_with_transform.py
+++ b/lib/datasets/get_dataset_with_transform.py
@@ -7,6 +7,7 @@ import torchvision.transforms as transforms
 from utils import Cutout
 from .TieredImageNet import TieredImageNet

+
 Dataset2Class = {'cifar10' : 10,
                 'cifar100': 100,
                 'tiered'  : -1,
@@ -59,11 +60,11 @@ def get_datasets(name, root, cutout):
  else: raise TypeError("Unknow dataset : {:}".format(name))

  if name == 'cifar10':
-    train_data = dset.CIFAR10(root, train=True, transform=train_transform, download=True)
-    test_data  = dset.CIFAR10(root, train=True, transform=test_transform , download=True)
+    train_data = dset.CIFAR10(root, train=True , transform=train_transform, download=True)
+    test_data  = dset.CIFAR10(root, train=False, transform=test_transform , download=True)
  elif name == 'cifar100':
-    train_data = dset.CIFAR100(root, train=True, transform=train_transform, download=True)
-    test_data  = dset.CIFAR100(root, train=True, transform=test_transform , download=True)
+    train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True)
+    test_data  = dset.CIFAR100(root, train=False, transform=test_transform , download=True)
  elif name == 'imagenet-1k' or name == 'imagenet-100':
    train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
    test_data  = dset.ImageFolder(osp.join(root, 'val'), train_transform)
--- a/lib/nas/init.py
+++ b/lib/nas/init.py
@@ -1,12 +1,5 @@
 from .model_search    import Network
-from .model_search_v1 import NetworkV1
-from .model_search_f1 import NetworkF1
 # acceleration model
-from .model_search_f1_acc2 import NetworkFACC1
-from .model_search_acc2 import NetworkACC2
-from .model_search_v3 import NetworkV3
-from .model_search_v4 import NetworkV4
-from .model_search_v5 import NetworkV5
 from .CifarNet import NetworkCIFAR
 from .ImageNet import NetworkImageNet

--- a/lib/nas/construct_utils.py
+++ b/lib/nas/construct_utils.py
@@ -128,7 +128,7 @@ class Transition(nn.Module):

    self.ops2 = nn.ModuleList(
                  [nn.Sequential(
-                      nn.MaxPool2d(3, stride=1, padding=1),
+                      nn.MaxPool2d(3, stride=2, padding=1),
                      nn.BatchNorm2d(C, affine=True)),
                   nn.Sequential(
                      nn.MaxPool2d(3, stride=2, padding=1),
@@ -144,7 +144,8 @@ class Transition(nn.Module):
    if self.training and drop_prob > 0.:
      X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)

-    X2 = self.ops2[0] (X0+X1)
+    #X2 = self.ops2[0] (X0+X1)
+    X2 = self.ops2[0] (s0)
    X3 = self.ops2[1] (s1)
    if self.training and drop_prob > 0.:
      X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)
--- a/lib/nas/model_search_acc2.py
+++ b/lib/nas/model_search_acc2.py
@@ -1,180 +0,0 @@
-# gumbel softmax
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-
-from .operations import OPS, FactorizedReduce, ReLUConvBN
-from .genotypes import PRIMITIVES, Genotype
-
-
-class MixedOp(nn.Module):
-
-  def __init__(self, C, stride):
-    super(MixedOp, self).__init__()
-    self._ops = nn.ModuleList()
-    for primitive in PRIMITIVES:
-      op = OPS[primitive](C, stride, False)
-      self._ops.append(op)
-
-  def forward(self, x, weights, cpu_weights):
-    use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
-    if use_sum > 3:
-      return sum(w * op(x) for w, op in zip(weights, self._ops))
-    else:
-      clist = []
-      for j, cpu_weight in enumerate(cpu_weights):
-        if abs(cpu_weight) > 1e-10:
-          clist.append( weights[j] * self._ops[j](x) )
-      assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
-      return sum(clist)
-
-
-class Cell(nn.Module):
-
-  def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    self.reduction = reduction
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
-    self._steps = steps
-    self._multiplier = multiplier
-
-    self._ops = nn.ModuleList()
-    for i in range(self._steps):
-      for j in range(2+i):
-        stride = 2 if reduction and j < 2 else 1
-        op = MixedOp(C, stride)
-        self._ops.append(op)
-
-  def forward(self, s0, s1, weights):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    cpu_weights = weights.tolist()
-    states = [s0, s1]
-    offset = 0
-    for i in range(self._steps):
-      clist = []
-      for j, h in enumerate(states):
-        x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
-        clist.append( x )
-      s = sum(clist)
-      offset += len(states)
-      states.append(s)
-
-    return torch.cat(states[-self._multiplier:], dim=1)
-
-
-class NetworkACC2(nn.Module):
-
-  def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
-    super(NetworkACC2, self).__init__()
-    self._C = C
-    self._num_classes = num_classes
-    self._layers = layers
-    self._steps  = steps
-    self._multiplier = multiplier
-
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
- 
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    reduction_prev, cells = False, []
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      cells.append( cell )
-      C_prev_prev, C_prev = C_prev, multiplier*C_curr
-    self.cells = nn.ModuleList(cells)
-
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-    self.tau        = 5
-    self.use_gumbel = True
-
-    # initialize architecture parameters
-    k = sum(1 for i in range(self._steps) for n in range(2+i))
-    num_ops = len(PRIMITIVES)
-
-    self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
-    self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
-    nn.init.normal_(self.alphas_normal, 0, 0.001)
-    nn.init.normal_(self.alphas_reduce, 0, 0.001)
-
-  def set_gumbel(self, use_gumbel):
-    self.use_gumbel = use_gumbel
-
-  def set_tau(self, tau):
-    self.tau = tau
-
-  def get_tau(self):
-    return self.tau
-
-  def arch_parameters(self):
-    return [self.alphas_normal, self.alphas_reduce]
-
-  def base_parameters(self):
-    lists = list(self.stem.parameters()) + list(self.cells.parameters())
-    lists += list(self.global_pooling.parameters())
-    lists += list(self.classifier.parameters())
-    return lists
-
-  def forward(self, inputs):
-    batch, C, H, W = inputs.size()
-    s0 = s1 = self.stem(inputs)
-    for i, cell in enumerate(self.cells):
-      if cell.reduction:
-        if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
-        else               : weights = F.softmax(self.alphas_reduce, dim=-1)
-      else:
-        if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
-        else               : weights = F.softmax(self.alphas_normal, dim=-1)
-
-      s0, s1 = s1, cell(s0, s1, weights)
-    out = self.global_pooling(s1)
-    out = out.view(batch, -1)
-    logits = self.classifier(out)
-    return logits
-
-  def genotype(self):
-
-    def _parse(weights):
-      gene, n, start = [], 2, 0
-      for i in range(self._steps):
-        end = start + n
-        W = weights[start:end].copy()
-        edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
-        for j in edges:
-          k_best = None
-          for k in range(len(W[j])):
-            if k != PRIMITIVES.index('none'):
-              if k_best is None or W[j][k] > W[j][k_best]:
-                k_best = k
-          gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
-        start = end
-        n += 1
-      return gene
-
-    with torch.no_grad():
-      gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-      gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
-
-      concat = range(2+self._steps-self._multiplier, self._steps+2)
-      genotype = Genotype(
-        normal=gene_normal, normal_concat=concat,
-        reduce=gene_reduce, reduce_concat=concat
-      )
-    return genotype
--- a/lib/nas/model_search_f1.py
+++ b/lib/nas/model_search_f1.py
@@ -1,167 +0,0 @@
-# share parameters
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-
-from .operations import OPS, FactorizedReduce, ReLUConvBN
-from .construct_utils import Transition
-from .genotypes import PRIMITIVES, Genotype
-
-
-class MixedOp(nn.Module):
-
-  def __init__(self, C, stride):
-    super(MixedOp, self).__init__()
-    self._ops = nn.ModuleList()
-    for primitive in PRIMITIVES:
-      op = OPS[primitive](C, stride, False)
-      self._ops.append(op)
-
-  def forward(self, x, weights):
-    return sum(w * op(x) for w, op in zip(weights, self._ops))
-
-
-class Cell(nn.Module):
-
-  def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    self.reduction = reduction
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
-    self._steps = steps
-    self._multiplier = multiplier
-
-    self._ops = nn.ModuleList()
-    for i in range(self._steps):
-      for j in range(2+i):
-        stride = 2 if reduction and j < 2 else 1
-        op = MixedOp(C, stride)
-        self._ops.append(op)
-
-  def forward(self, s0, s1, weights):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    states = [s0, s1]
-    offset = 0
-    for i in range(self._steps):
-      clist = []
-      for j, h in enumerate(states):
-        x = self._ops[offset+j](h, weights[offset+j])
-        clist.append( x )
-      s = sum(clist)
-      offset += len(states)
-      states.append(s)
-
-    return torch.cat(states[-self._multiplier:], dim=1)
-
-
-class NetworkF1(nn.Module):
-
-  def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
-    super(NetworkF1, self).__init__()
-    self._C = C
-    self._num_classes = num_classes
-    self._layers = layers
-    self._steps  = steps
-    self._multiplier = multiplier
-
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
- 
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    reduction_prev, cells = False, []
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      if reduction:
-        cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
-      else:
-        cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      cells.append( cell )
-      C_prev_prev, C_prev = C_prev, multiplier*C_curr
-    self.cells = nn.ModuleList(cells)
-
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-
-    # initialize architecture parameters
-    k = sum(1 for i in range(self._steps) for n in range(2+i))
-    num_ops = len(PRIMITIVES)
-
-    self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
-    #self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
-    nn.init.normal_(self.alphas_normal, 0, 0.001)
-    #nn.init.normal_(self.alphas_reduce, 0, 0.001)
-
-  def set_tau(self, tau):
-    return -1
-
-  def get_tau(self):
-    return -1
-
-  def arch_parameters(self):
-    return [self.alphas_normal] 
-
-  def base_parameters(self):
-    lists = list(self.stem.parameters()) + list(self.cells.parameters())
-    lists += list(self.global_pooling.parameters())
-    lists += list(self.classifier.parameters())
-    return lists
-
-  def forward(self, inputs):
-    batch, C, H, W = inputs.size()
-    s0 = s1 = self.stem(inputs)
-    for i, cell in enumerate(self.cells):
-      if cell.reduction:
-        s0, s1 = s1, cell(s0, s1)
-      else:
-        weights = F.softmax(self.alphas_normal, dim=-1)
-        s0, s1 = s1, cell(s0, s1, weights)
-      #print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
-    out = self.global_pooling(s1)
-    out = out.view(batch, -1)
-    logits = self.classifier(out)
-    return logits
-
-  def genotype(self):
-
-    def _parse(weights):
-      gene, n, start = [], 2, 0
-      for i in range(self._steps):
-        end = start + n
-        W = weights[start:end].copy()
-        edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
-        for j in edges:
-          k_best = None
-          for k in range(len(W[j])):
-            if k != PRIMITIVES.index('none'):
-              if k_best is None or W[j][k] > W[j][k_best]:
-                k_best = k
-          gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
-        start = end
-        n += 1
-      return gene
-
-    with torch.no_grad():
-      gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-      #gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-
-      concat = range(2+self._steps-self._multiplier, self._steps+2)
-      genotype = Genotype(
-        normal=gene_normal, normal_concat=concat,
-        reduce=None       , reduce_concat=concat
-      )
-    return genotype
--- a/lib/nas/model_search_f1_acc2.py
+++ b/lib/nas/model_search_f1_acc2.py
@@ -1,183 +0,0 @@
-# share parameters
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-
-from .operations import OPS, FactorizedReduce, ReLUConvBN
-from .construct_utils import Transition
-from .genotypes import PRIMITIVES, Genotype
-
-
-class MixedOp(nn.Module):
-
-  def __init__(self, C, stride):
-    super(MixedOp, self).__init__()
-    self._ops = nn.ModuleList()
-    for primitive in PRIMITIVES:
-      op = OPS[primitive](C, stride, False)
-      self._ops.append(op)
-
-  def forward(self, x, weights, cpu_weights):
-    use_sum = sum([abs(_) > 1e-10 for _ in cpu_weights])
-    if use_sum > 3:
-      return sum(w * op(x) for w, op in zip(weights, self._ops))
-    else:
-      clist = []
-      for j, cpu_weight in enumerate(cpu_weights):
-        if abs(cpu_weight) > 1e-10:
-          clist.append( weights[j] * self._ops[j](x) )
-      assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
-      return sum(clist)
-
-
-class Cell(nn.Module):
-
-  def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    self.reduction = reduction
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
-    self._steps = steps
-    self._multiplier = multiplier
-
-    self._ops = nn.ModuleList()
-    for i in range(self._steps):
-      for j in range(2+i):
-        stride = 2 if reduction and j < 2 else 1
-        op = MixedOp(C, stride)
-        self._ops.append(op)
-
-  def forward(self, s0, s1, weights):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    cpu_weights = weights.tolist()
-    states = [s0, s1]
-    offset = 0
-    for i in range(self._steps):
-      clist = []
-      for j, h in enumerate(states):
-        x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
-        clist.append( x )
-      s = sum(clist)
-      offset += len(states)
-      states.append(s)
-
-    return torch.cat(states[-self._multiplier:], dim=1)
-
-
-class NetworkFACC1(nn.Module):
-
-  def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
-    super(NetworkFACC1, self).__init__()
-    self._C = C
-    self._num_classes = num_classes
-    self._layers = layers
-    self._steps  = steps
-    self._multiplier = multiplier
-    self.tau     = 5
-    self.use_gumbel = True
-
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
- 
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    reduction_prev, cells = False, []
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      if reduction:
-        cell = Transition(C_prev_prev, C_prev, C_curr, reduction_prev, multiplier)
-      else:
-        cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      cells.append( cell )
-      C_prev_prev, C_prev = C_prev, multiplier*C_curr
-    self.cells = nn.ModuleList(cells)
-
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-
-    # initialize architecture parameters
-    k = sum(1 for i in range(self._steps) for n in range(2+i))
-    num_ops = len(PRIMITIVES)
-
-    self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
-    #self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
-    nn.init.normal_(self.alphas_normal, 0, 0.001)
-    #nn.init.normal_(self.alphas_reduce, 0, 0.001)
-
-  def set_gumbel(self, use_gumbel):
-    self.use_gumbel = use_gumbel
-  
-  def set_tau(self, tau):
-    self.tau = tau
-
-  def get_tau(self):
-    return self.tau
-
-  def arch_parameters(self):
-    return [self.alphas_normal] 
-
-  def base_parameters(self):
-    lists = list(self.stem.parameters()) + list(self.cells.parameters())
-    lists += list(self.global_pooling.parameters())
-    lists += list(self.classifier.parameters())
-    return lists
-
-  def forward(self, inputs):
-    batch, C, H, W = inputs.size()
-    s0 = s1 = self.stem(inputs)
-    for i, cell in enumerate(self.cells):
-      if cell.reduction:
-        s0, s1 = s1, cell(s0, s1)
-      else:
-        if self.use_gumbel : weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
-        else               : weights = F.softmax(self.alphas_normal, dim=-1)
-        s0, s1 = s1, cell(s0, s1, weights)
-      #print('{:} : s0 : {:}, s1 : {:}'.format(i, s0.size(), s1.size()))
-    out = self.global_pooling(s1)
-    out = out.view(batch, -1)
-    logits = self.classifier(out)
-    return logits
-
-  def genotype(self):
-
-    def _parse(weights):
-      gene, n, start = [], 2, 0
-      for i in range(self._steps):
-        end = start + n
-        W = weights[start:end].copy()
-        edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
-        for j in edges:
-          k_best = None
-          for k in range(len(W[j])):
-            if k != PRIMITIVES.index('none'):
-              if k_best is None or W[j][k] > W[j][k_best]:
-                k_best = k
-          gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
-        start = end
-        n += 1
-      return gene
-
-    with torch.no_grad():
-      gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-      #gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-
-      concat = range(2+self._steps-self._multiplier, self._steps+2)
-      genotype = Genotype(
-        normal=gene_normal, normal_concat=concat,
-        reduce=None       , reduce_concat=concat
-      )
-    return genotype
--- a/lib/nas/model_search_v1.py
+++ b/lib/nas/model_search_v1.py
@@ -1,161 +0,0 @@
-# share parameters
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-from .operations import OPS, FactorizedReduce, ReLUConvBN
-from .genotypes import PRIMITIVES, Genotype
-
-
-class MixedOp(nn.Module):
-
-  def __init__(self, C, stride):
-    super(MixedOp, self).__init__()
-    self._ops = nn.ModuleList()
-    for primitive in PRIMITIVES:
-      op = OPS[primitive](C, stride, False)
-      self._ops.append(op)
-
-  def forward(self, x, weights):
-    return sum(w * op(x) for w, op in zip(weights, self._ops))
-
-
-class Cell(nn.Module):
-
-  def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    self.reduction = reduction
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
-    self._steps = steps
-    self._multiplier = multiplier
-
-    self._ops = nn.ModuleList()
-    for i in range(self._steps):
-      for j in range(2+i):
-        stride = 2 if reduction and j < 2 else 1
-        op = MixedOp(C, stride)
-        self._ops.append(op)
-
-  def forward(self, s0, s1, weights):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    states = [s0, s1]
-    offset = 0
-    for i in range(self._steps):
-      clist = []
-      for j, h in enumerate(states):
-        x = self._ops[offset+j](h, weights[offset+j])
-        clist.append( x )
-      s = sum(clist)
-      offset += len(states)
-      states.append(s)
-
-    return torch.cat(states[-self._multiplier:], dim=1)
-
-
-class NetworkV1(nn.Module):
-
-  def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
-    super(NetworkV1, self).__init__()
-    self._C = C
-    self._num_classes = num_classes
-    self._layers = layers
-    self._steps  = steps
-    self._multiplier = multiplier
-
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
- 
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    reduction_prev, cells = False, []
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      cells.append( cell )
-      C_prev_prev, C_prev = C_prev, multiplier*C_curr
-    self.cells = nn.ModuleList(cells)
-
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-
-    # initialize architecture parameters
-    k = sum(1 for i in range(self._steps) for n in range(2+i))
-    num_ops = len(PRIMITIVES)
-
-    self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
-    #self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
-    nn.init.normal_(self.alphas_normal, 0, 0.001)
-    #nn.init.normal_(self.alphas_reduce, 0, 0.001)
-
-  def set_tau(self, tau):
-    return -1
-
-  def get_tau(self):
-    return -1
-
-  def arch_parameters(self):
-    return [self.alphas_normal] 
-
-  def base_parameters(self):
-    lists = list(self.stem.parameters()) + list(self.cells.parameters())
-    lists += list(self.global_pooling.parameters())
-    lists += list(self.classifier.parameters())
-    return lists
-
-  def forward(self, inputs):
-    batch, C, H, W = inputs.size()
-    s0 = s1 = self.stem(inputs)
-    for i, cell in enumerate(self.cells):
-      if cell.reduction:
-        weights = F.softmax(self.alphas_normal, dim=-1)
-      else:
-        weights = F.softmax(self.alphas_normal, dim=-1)
-      s0, s1 = s1, cell(s0, s1, weights)
-    out = self.global_pooling(s1)
-    out = out.view(batch, -1)
-    logits = self.classifier(out)
-    return logits
-
-  def genotype(self):
-
-    def _parse(weights):
-      gene, n, start = [], 2, 0
-      for i in range(self._steps):
-        end = start + n
-        W = weights[start:end].copy()
-        edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
-        for j in edges:
-          k_best = None
-          for k in range(len(W[j])):
-            if k != PRIMITIVES.index('none'):
-              if k_best is None or W[j][k] > W[j][k_best]:
-                k_best = k
-          gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
-        start = end
-        n += 1
-      return gene
-
-    with torch.no_grad():
-      gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-      gene_reduce = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-
-      concat = range(2+self._steps-self._multiplier, self._steps+2)
-      genotype = Genotype(
-        normal=gene_normal, normal_concat=concat,
-        reduce=gene_reduce, reduce_concat=concat
-      )
-    return genotype
--- a/lib/nas/model_search_v3.py
+++ b/lib/nas/model_search_v3.py
@@ -1,171 +0,0 @@
-# random selection
-import torch
-import random
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-from .operations import OPS, FactorizedReduce, ReLUConvBN
-from .genotypes import PRIMITIVES, Genotype
-from .construct_utils import random_select, all_select
-
-
-
-class MixedOp(nn.Module):
-
-  def __init__(self, C, stride):
-    super(MixedOp, self).__init__()
-    self._ops = nn.ModuleList()
-    for primitive in PRIMITIVES:
-      op = OPS[primitive](C, stride, False)
-      self._ops.append(op)
-
-  def forward(self, x, weights, cpu_weights):
-    return sum(w * op(x) for w, op in zip(weights, self._ops))
-
-
-class Cell(nn.Module):
-
-  def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    self.reduction = reduction
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
-    self._steps = steps
-    self._multiplier = multiplier
-
-    self._ops = nn.ModuleList()
-    for i in range(self._steps):
-      for j in range(2+i):
-        stride = 2 if reduction and j < 2 else 1
-        op = MixedOp(C, stride)
-        self._ops.append(op)
-
-  def forward(self, s0, s1, weights):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    cpu_weights = weights.tolist()
-    states = [s0, s1]
-    offset = 0
-    for i in range(self._steps):
-      clist = []
-      if i == 0:
-        indicator = all_select( len(states) )
-      else:
-        indicator = random_select( len(states), 0.5 )
-      for j, h in enumerate(states):
-        if indicator[j] == 0: continue
-        x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
-        clist.append( x )
-      s = sum(clist) / sum(indicator)
-      offset += len(states)
-      states.append(s)
-
-    return torch.cat(states[-self._multiplier:], dim=1)
-
-
-class NetworkV3(nn.Module):
-
-  def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
-    super(NetworkV3, self).__init__()
-    self._C = C
-    self._num_classes = num_classes
-    self._layers = layers
-    self._steps  = steps
-    self._multiplier = multiplier
-
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
- 
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    reduction_prev, cells = False, []
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      cells.append( cell )
-      C_prev_prev, C_prev = C_prev, multiplier*C_curr
-    self.cells = nn.ModuleList(cells)
-
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-    self.tau        = 5
-
-    # initialize architecture parameters
-    k = sum(1 for i in range(self._steps) for n in range(2+i))
-    num_ops = len(PRIMITIVES)
-
-    self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
-    self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
-    nn.init.normal_(self.alphas_normal, 0, 0.001)
-    nn.init.normal_(self.alphas_reduce, 0, 0.001)
-
-  def set_tau(self, tau):
-    self.tau = tau
-
-  def get_tau(self):
-    return self.tau
-
-  def arch_parameters(self):
-    return [self.alphas_normal, self.alphas_reduce]
-
-  def base_parameters(self):
-    lists = list(self.stem.parameters()) + list(self.cells.parameters())
-    lists += list(self.global_pooling.parameters())
-    lists += list(self.classifier.parameters())
-    return lists
-
-  def forward(self, inputs):
-    batch, C, H, W = inputs.size()
-    s0 = s1 = self.stem(inputs)
-    for i, cell in enumerate(self.cells):
-      if cell.reduction:
-        weights = F.softmax(self.alphas_reduce, dim=-1)
-      else:
-        weights = F.softmax(self.alphas_reduce, dim=-1)
-      s0, s1 = s1, cell(s0, s1, weights)
-    out = self.global_pooling(s1)
-    out = out.view(batch, -1)
-    logits = self.classifier(out)
-    return logits
-
-  def genotype(self):
-
-    def _parse(weights):
-      gene, n, start = [], 2, 0
-      for i in range(self._steps):
-        end = start + n
-        W = weights[start:end].copy()
-        edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
-        for j in edges:
-          k_best = None
-          for k in range(len(W[j])):
-            if k != PRIMITIVES.index('none'):
-              if k_best is None or W[j][k] > W[j][k_best]:
-                k_best = k
-          gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
-        start = end
-        n += 1
-      return gene
-
-    with torch.no_grad():
-      gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-      gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
-
-      concat = range(2+self._steps-self._multiplier, self._steps+2)
-      genotype = Genotype(
-        normal=gene_normal, normal_concat=concat,
-        reduce=gene_reduce, reduce_concat=concat
-      )
-    return genotype
--- a/lib/nas/model_search_v4.py
+++ b/lib/nas/model_search_v4.py
@@ -1,176 +0,0 @@
-# random selection
-import torch
-import random
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-from .operations import OPS, FactorizedReduce, ReLUConvBN
-from .genotypes import PRIMITIVES, Genotype
-from .construct_utils import random_select, all_select
-
-
-class MixedOp(nn.Module):
-
-  def __init__(self, C, stride):
-    super(MixedOp, self).__init__()
-    self._ops = nn.ModuleList()
-    for primitive in PRIMITIVES:
-      op = OPS[primitive](C, stride, False)
-      self._ops.append(op)
-
-  def forward(self, x, weights, cpu_weights):
-    indicators = random_select( len(cpu_weights), 0.5 )
-    clist, ws = [], []
-    for w, indicator, op in zip(weights, indicators, self._ops):
-      if indicator:
-        clist.append( w * op(x) )
-        ws.append( w )
-    return sum(clist) / sum(ws)
-
-
-class Cell(nn.Module):
-
-  def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    self.reduction = reduction
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
-    self._steps = steps
-    self._multiplier = multiplier
-
-    self._ops = nn.ModuleList()
-    for i in range(self._steps):
-      for j in range(2+i):
-        stride = 2 if reduction and j < 2 else 1
-        op = MixedOp(C, stride)
-        self._ops.append(op)
-
-  def forward(self, s0, s1, weights):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    cpu_weights = weights.tolist()
-    states = [s0, s1]
-    offset = 0
-    for i in range(self._steps):
-      clist = []
-      if i == 0:
-        indicator = all_select( len(states) )
-      else:
-        indicator = random_select( len(states), 0.5 )
-      for j, h in enumerate(states):
-        if indicator[j] == 0: continue
-        x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
-        clist.append( x )
-      s = sum(clist) / sum(indicator)
-      offset += len(states)
-      states.append(s)
-
-    return torch.cat(states[-self._multiplier:], dim=1)
-
-
-class NetworkV4(nn.Module):
-
-  def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
-    super(NetworkV4, self).__init__()
-    self._C = C
-    self._num_classes = num_classes
-    self._layers = layers
-    self._steps  = steps
-    self._multiplier = multiplier
-
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
- 
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    reduction_prev, cells = False, []
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      cells.append( cell )
-      C_prev_prev, C_prev = C_prev, multiplier*C_curr
-    self.cells = nn.ModuleList(cells)
-
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-    self.tau        = 5
-
-    # initialize architecture parameters
-    k = sum(1 for i in range(self._steps) for n in range(2+i))
-    num_ops = len(PRIMITIVES)
-
-    self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
-    self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
-    nn.init.normal_(self.alphas_normal, 0, 0.001)
-    nn.init.normal_(self.alphas_reduce, 0, 0.001)
-
-  def set_tau(self, tau):
-    self.tau = tau
-
-  def get_tau(self):
-    return self.tau
-
-  def arch_parameters(self):
-    return [self.alphas_normal, self.alphas_reduce]
-
-  def base_parameters(self):
-    lists = list(self.stem.parameters()) + list(self.cells.parameters())
-    lists += list(self.global_pooling.parameters())
-    lists += list(self.classifier.parameters())
-    return lists
-
-  def forward(self, inputs):
-    batch, C, H, W = inputs.size()
-    s0 = s1 = self.stem(inputs)
-    for i, cell in enumerate(self.cells):
-      if cell.reduction:
-        weights = F.softmax(self.alphas_reduce, dim=-1)
-      else:
-        weights = F.softmax(self.alphas_reduce, dim=-1)
-      s0, s1 = s1, cell(s0, s1, weights)
-    out = self.global_pooling(s1)
-    out = out.view(batch, -1)
-    logits = self.classifier(out)
-    return logits
-
-  def genotype(self):
-
-    def _parse(weights):
-      gene, n, start = [], 2, 0
-      for i in range(self._steps):
-        end = start + n
-        W = weights[start:end].copy()
-        edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
-        for j in edges:
-          k_best = None
-          for k in range(len(W[j])):
-            if k != PRIMITIVES.index('none'):
-              if k_best is None or W[j][k] > W[j][k_best]:
-                k_best = k
-          gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
-        start = end
-        n += 1
-      return gene
-
-    with torch.no_grad():
-      gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-      gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
-
-      concat = range(2+self._steps-self._multiplier, self._steps+2)
-      genotype = Genotype(
-        normal=gene_normal, normal_concat=concat,
-        reduce=gene_reduce, reduce_concat=concat
-      )
-    return genotype
--- a/lib/nas/model_search_v5.py
+++ b/lib/nas/model_search_v5.py
@@ -1,174 +0,0 @@
-# gumbel softmax
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.parameter import Parameter
-from .operations import OPS, FactorizedReduce, ReLUConvBN
-from .genotypes import PRIMITIVES, Genotype
-from .construct_utils import random_select, all_select
-
-
-class MixedOp(nn.Module):
-
-  def __init__(self, C, stride):
-    super(MixedOp, self).__init__()
-    self._ops = nn.ModuleList()
-    for primitive in PRIMITIVES:
-      op = OPS[primitive](C, stride, False)
-      self._ops.append(op)
-
-  def forward(self, x, weights, cpu_weights):
-    clist = []
-    for j, cpu_weight in enumerate(cpu_weights):
-      if abs(cpu_weight) > 1e-10:
-        clist.append( weights[j] * self._ops[j](x) )
-    assert len(clist) > 0, 'invalid length : {:}'.format(cpu_weights)
-    if len(clist) == 1: return clist[0]
-    else              : return sum(clist)
-
-
-class Cell(nn.Module):
-
-  def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    self.reduction = reduction
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
-    self._steps = steps
-    self._multiplier = multiplier
-
-    self._ops = nn.ModuleList()
-    for i in range(self._steps):
-      for j in range(2+i):
-        stride = 2 if reduction and j < 2 else 1
-        op = MixedOp(C, stride)
-        self._ops.append(op)
-
-  def forward(self, s0, s1, weights):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    cpu_weights = weights.tolist()
-    states = [s0, s1]
-    offset = 0
-    for i in range(self._steps):
-      clist = []
-      if i == 0: indicator = all_select( len(states) )
-      else     : indicator = random_select( len(states), 0.6 )
-
-      for j, h in enumerate(states):
-        if indicator[j] == 0: continue
-        x = self._ops[offset+j](h, weights[offset+j], cpu_weights[offset+j])
-        clist.append( x )
-      s = sum(clist)
-      offset += len(states)
-      states.append(s)
-
-    return torch.cat(states[-self._multiplier:], dim=1)
-
-
-class NetworkV5(nn.Module):
-
-  def __init__(self, C, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3):
-    super(NetworkV5, self).__init__()
-    self._C = C
-    self._num_classes = num_classes
-    self._layers = layers
-    self._steps  = steps
-    self._multiplier = multiplier
-
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
- 
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    reduction_prev, cells = False, []
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      cells.append( cell )
-      C_prev_prev, C_prev = C_prev, multiplier*C_curr
-    self.cells = nn.ModuleList(cells)
-
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-    self.tau        = 5
-
-    # initialize architecture parameters
-    k = sum(1 for i in range(self._steps) for n in range(2+i))
-    num_ops = len(PRIMITIVES)
-
-    self.alphas_normal = Parameter(torch.Tensor(k, num_ops))
-    self.alphas_reduce = Parameter(torch.Tensor(k, num_ops))
-    nn.init.normal_(self.alphas_normal, 0, 0.001)
-    nn.init.normal_(self.alphas_reduce, 0, 0.001)
-
-  def set_tau(self, tau):
-    self.tau = tau
-
-  def get_tau(self):
-    return self.tau
-
-  def arch_parameters(self):
-    return [self.alphas_normal, self.alphas_reduce]
-
-  def base_parameters(self):
-    lists = list(self.stem.parameters()) + list(self.cells.parameters())
-    lists += list(self.global_pooling.parameters())
-    lists += list(self.classifier.parameters())
-    return lists
-
-  def forward(self, inputs):
-    batch, C, H, W = inputs.size()
-    s0 = s1 = self.stem(inputs)
-    for i, cell in enumerate(self.cells):
-      if cell.reduction:
-        weights = F.gumbel_softmax(self.alphas_reduce, self.tau, True)
-      else:
-        weights = F.gumbel_softmax(self.alphas_normal, self.tau, True)
-      s0, s1 = s1, cell(s0, s1, weights)
-    out = self.global_pooling(s1)
-    out = out.view(batch, -1)
-    logits = self.classifier(out)
-    return logits
-
-  def genotype(self):
-
-    def _parse(weights):
-      gene, n, start = [], 2, 0
-      for i in range(self._steps):
-        end = start + n
-        W = weights[start:end].copy()
-        edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
-        for j in edges:
-          k_best = None
-          for k in range(len(W[j])):
-            if k != PRIMITIVES.index('none'):
-              if k_best is None or W[j][k] > W[j][k_best]:
-                k_best = k
-          gene.append((PRIMITIVES[k_best], j, float(W[j][k_best])))
-        start = end
-        n += 1
-      return gene
-
-    with torch.no_grad():
-      gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).cpu().numpy())
-      gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).cpu().numpy())
-
-      concat = range(2+self._steps-self._multiplier, self._steps+2)
-      genotype = Genotype(
-        normal=gene_normal, normal_concat=concat,
-        reduce=gene_reduce, reduce_concat=concat
-      )
-    return genotype
--- a/lib/utils/model_utils.py
+++ b/lib/utils/model_utils.py
@@ -2,6 +2,7 @@ import torch
 import torch.nn as nn
 import numpy as np

+
 def count_parameters_in_MB(model):
  if isinstance(model, nn.Module):
    return np.sum(np.prod(v.size()) for v in model.parameters())/1e6