Update TAS abd FBV2 for NAS-Bench

2020-07-24 12:56:34 +00:00
parent b9fbe5577c
commit 4a2292a863
8 changed files with 491 additions and 12 deletions
--- a/lib/models/init.py
+++ b/lib/models/init.py
@@ -12,8 +12,8 @@ __all__ = ['change_key', 'get_cell_based_tiny_net', 'get_search_spaces', 'get_ci

 # useful modules
 from config_utils import dict2config
-from .SharedUtils import change_key
-from .cell_searchs import CellStructure, CellArchitectures
+from models.SharedUtils import change_key
+from models.cell_searchs import CellStructure, CellArchitectures


 # Cell-based NAS Models
@@ -27,6 +27,10 @@ def get_cell_based_tiny_net(config):
      return nas_super_nets[config.name](config.C, config.N, config.max_nodes, config.num_classes, config.space, config.affine, config.track_running_stats)
    except:
      return nas_super_nets[config.name](config.C, config.N, config.max_nodes, config.num_classes, config.space)
+  elif super_type == 'search-shape':
+    from .shape_searchs import GenericNAS301Model
+    genotype = CellStructure.str2structure(config.genotype)
+    return GenericNAS301Model(config.candidate_Cs, config.max_num_Cs, genotype, config.num_classes, config.affine, config.track_running_stats)
  elif super_type == 'nasnet-super':
    from .cell_searchs import nasnet_super_nets as nas_super_nets
    return nas_super_nets[config.name](config.C, config.N, config.steps, config.multiplier, \
--- a/lib/models/cell_infers/cells.py
+++ b/lib/models/cell_infers/cells.py
@@ -5,13 +5,14 @@
 import torch
 import torch.nn as nn
 from copy import deepcopy
-from ..cell_operations import OPS
+
+from models.cell_operations import OPS


 # Cell for NAS-Bench-201
 class InferCell(nn.Module):

-  def __init__(self, genotype, C_in, C_out, stride):
+  def __init__(self, genotype, C_in, C_out, stride, affine=True, track_running_stats=True):
    super(InferCell, self).__init__()

    self.layers  = nn.ModuleList()
@@ -24,9 +25,9 @@ class InferCell(nn.Module):
      cur_innod = []
      for (op_name, op_in) in node_info:
        if op_in == 0:
-          layer = OPS[op_name](C_in , C_out, stride, True, True)
+          layer = OPS[op_name](C_in , C_out, stride, affine, track_running_stats)
        else:
-          layer = OPS[op_name](C_out, C_out,      1, True, True)
+          layer = OPS[op_name](C_out, C_out,      1, affine, track_running_stats)
        cur_index.append( len(self.layers) )
        cur_innod.append( op_in )
        self.layers.append( layer )
--- a/lib/models/cell_operations.py
+++ b/lib/models/cell_operations.py
@@ -74,17 +74,17 @@ class DualSepConv(nn.Module):

 class ResNetBasicblock(nn.Module):

-  def __init__(self, inplanes, planes, stride, affine=True):
+  def __init__(self, inplanes, planes, stride, affine=True, track_running_stats=True):
    super(ResNetBasicblock, self).__init__()
    assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
-    self.conv_a = ReLUConvBN(inplanes, planes, 3, stride, 1, 1, affine)
-    self.conv_b = ReLUConvBN(  planes, planes, 3,      1, 1, 1, affine)
+    self.conv_a = ReLUConvBN(inplanes, planes, 3, stride, 1, 1, affine, track_running_stats)
+    self.conv_b = ReLUConvBN(  planes, planes, 3,      1, 1, 1, affine, track_running_stats)
    if stride == 2:
      self.downsample = nn.Sequential(
                           nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
                           nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False))
    elif inplanes != planes:
-      self.downsample = ReLUConvBN(inplanes, planes, 1, 1, 0, 1, affine)
+      self.downsample = ReLUConvBN(inplanes, planes, 1, 1, 0, 1, affine, track_running_stats)
    else:
      self.downsample = None
    self.in_dim  = inplanes
--- a/lib/models/shape_searchs/init.py
+++ b/lib/models/shape_searchs/init.py
@@ -6,3 +6,4 @@ from .SearchCifarResNet_depth import SearchDepthCifarResNet
 from .SearchCifarResNet       import SearchShapeCifarResNet
 from .SearchSimResNet_width   import SearchWidthSimResNet
 from .SearchImagenetResNet    import SearchShapeImagenetResNet
+from .generic_size_tiny_cell_model import GenericNAS301Model
--- a/lib/models/shape_searchs/generic_size_tiny_cell_model.py
+++ b/lib/models/shape_searchs/generic_size_tiny_cell_model.py
@@ -0,0 +1,139 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
+#####################################################
+from typing import List, Text, Any
+import random, torch
+import torch.nn as nn
+
+from models.cell_operations import ResNetBasicblock
+from models.cell_infers.cells import InferCell
+from models.shape_searchs.SoftSelect import select2withP, ChannelWiseInter
+
+
+class GenericNAS301Model(nn.Module):
+
+  def __init__(self, candidate_Cs: List[int], max_num_Cs: int, genotype: Any, num_classes: int, affine: bool, track_running_stats: bool):
+    super(GenericNAS301Model, self).__init__()
+    self._max_num_Cs = max_num_Cs
+    self._candidate_Cs = candidate_Cs
+    if max_num_Cs % 3 != 2:
+      raise ValueError('invalid number of layers : {:}'.format(max_num_Cs))
+    self._num_stage = N = max_num_Cs // 3
+    self._max_C = max(candidate_Cs)
+
+    stem = nn.Sequential(
+                    nn.Conv2d(3, self._max_C, kernel_size=3, padding=1, bias=not affine),
+                    nn.BatchNorm2d(self._max_C, affine=affine, track_running_stats=track_running_stats))
+
+    layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
+
+    c_prev = self._max_C
+    self._cells = nn.ModuleList()
+    self._cells.append(stem)
+    for index, reduction in enumerate(layer_reductions):
+      if reduction : cell = ResNetBasicblock(c_prev, self._max_C, 2, True)
+      else         : cell = InferCell(genotype, c_prev, self._max_C, 1, affine, track_running_stats)
+      self._cells.append(cell)
+      c_prev = cell.out_dim
+    self._num_layer = len(self._cells)
+
+    self.lastact = nn.Sequential(nn.BatchNorm2d(c_prev, affine=affine, track_running_stats=track_running_stats), nn.ReLU(inplace=True))
+    self.global_pooling = nn.AdaptiveAvgPool2d(1)
+    self.classifier = nn.Linear(c_prev, num_classes)
+    # algorithm related
+    self.register_buffer('_tau', torch.zeros(1))
+    self._algo        = None
+
+  def set_algo(self, algo: Text):
+    # used for searching
+    assert self._algo is None, 'This functioin can only be called once.'
+    assert algo in ['fbv2', 'enas', 'tas'], 'invalid algo : {:}'.format(algo)
+    self._algo = algo
+    self._arch_parameters = nn.Parameter(1e-3*torch.randn(self._max_num_Cs, len(self._candidate_Cs)))
+    if algo == 'fbv2' or algo == 'enas':
+      self.register_buffer('_masks', torch.zeros(len(self._candidate_Cs), max(self._candidate_Cs)))
+      for i in range(len(self._candidate_Cs)):
+        self._masks.data[i, :self._candidate_Cs[i]] = 1
+  
+  @property
+  def tau(self):
+    return self._tau
+
+  def set_tau(self, tau):
+    self._tau.data[:] = tau
+
+  @property
+  def weights(self):
+    xlist = list(self._cells.parameters())
+    xlist+= list(self.lastact.parameters())
+    xlist+= list(self.global_pooling.parameters())
+    xlist+= list(self.classifier.parameters())
+    return xlist
+
+  @property
+  def alphas(self):
+    return [self._arch_parameters]
+
+  def show_alphas(self):
+    with torch.no_grad():
+      return 'arch-parameters :\n{:}'.format(nn.functional.softmax(self._arch_parameters, dim=-1).cpu())
+
+  @property
+  def random(self):
+    cs = []
+    for i in range(self._max_num_Cs):
+      index = random.randint(0, len(self._candidate_Cs)-1)
+      cs.append(str(self._candidate_Cs[index]))
+    return ':'.join(cs)
+  
+  @property
+  def genotype(self):
+    cs = []
+    for i in range(self._max_num_Cs):
+      with torch.no_grad():
+        index = self._arch_parameters[i].argmax().item()
+        cs.append(str(self._candidate_Cs[index]))
+    return ':'.join(cs)
+
+  def get_message(self) -> Text:
+    string = self.extra_repr()
+    for i, cell in enumerate(self._cells):
+      string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self._cells), cell.extra_repr())
+    return string
+
+  def extra_repr(self):
+    return ('{name}(candidates={_candidate_Cs}, num={_max_num_Cs}, N={_num_stage}, L={_num_layer})'.format(name=self.__class__.__name__, **self.__dict__))
+
+  def forward(self, inputs):
+    feature = inputs
+    for i, cell in enumerate(self._cells):
+      feature = cell(feature)
+      if self._algo == 'fbv2':
+        idx = max(0, i-1)
+        weights = nn.functional.gumbel_softmax(self._arch_parameters[idx:idx+1], tau=self.tau, dim=-1)
+        mask = torch.matmul(weights, self._masks).view(1, -1, 1, 1)
+        feature = feature * mask
+      elif self._algo == 'tas':
+        idx = max(0, i-1)
+        selected_cs, selected_probs = select2withP(self._arch_parameters[idx:idx+1], self.tau, num=2)
+        with torch.no_grad():
+          i1, i2 = selected_cs.cpu().view(-1).tolist()
+        c1, c2 = self._candidate_Cs[i1], self._candidate_Cs[i2]
+        out_channel = max(c1, c2)
+        out1 = ChannelWiseInter(feature[:, :c1], out_channel)
+        out2 = ChannelWiseInter(feature[:, :c2], out_channel)
+        out  = out1 * selected_probs[0, 0] + out2 * selected_probs[0, 1]
+        if feature.shape[1] == out.shape[1]:
+          feature = out
+        else:
+          miss = torch.zeros(feature.shape[0], feature.shape[1]-out.shape[1], feature.shape[2], feature.shape[3], device=feature.device)
+          feature = torch.cat((out, miss), dim=1)
+      else:
+        raise ValueError('invalid algorithm : {:}'.format(self._algo))
+
+    out = self.lastact(feature)
+    out = self.global_pooling(out)
+    out = out.view(out.size(0), -1)
+    logits = self.classifier(out)
+
+    return out, logits