update README

2019-09-28 20:18:18 +10:00
parent 180702ab8e
commit f8f3f382e0
18 changed files with 9 additions and 779 deletions
--- a/lib/models/CifarDenseNet.py
+++ b/lib/models/CifarDenseNet.py
@@ -1,105 +0,0 @@
-##################################################
-# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
-##################################################
-import math, torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .initialization import initialize_resnet
-
-
-class Bottleneck(nn.Module):
-  def __init__(self, nChannels, growthRate):
-    super(Bottleneck, self).__init__()
-    interChannels = 4*growthRate
-    self.bn1 = nn.BatchNorm2d(nChannels)
-    self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, bias=False)
-    self.bn2 = nn.BatchNorm2d(interChannels)
-    self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, padding=1, bias=False)
-
-  def forward(self, x):
-    out = self.conv1(F.relu(self.bn1(x)))
-    out = self.conv2(F.relu(self.bn2(out)))
-    out = torch.cat((x, out), 1)
-    return out
-
-
-class SingleLayer(nn.Module):
-  def __init__(self, nChannels, growthRate):
-    super(SingleLayer, self).__init__()
-    self.bn1 = nn.BatchNorm2d(nChannels)
-    self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1, bias=False)
-
-  def forward(self, x):
-    out = self.conv1(F.relu(self.bn1(x)))
-    out = torch.cat((x, out), 1)
-    return out
-
-
-class Transition(nn.Module):
-  def __init__(self, nChannels, nOutChannels):
-    super(Transition, self).__init__()
-    self.bn1 = nn.BatchNorm2d(nChannels)
-    self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, bias=False)
-
-  def forward(self, x):
-    out = self.conv1(F.relu(self.bn1(x)))
-    out = F.avg_pool2d(out, 2)
-    return out
-
-
-class DenseNet(nn.Module):
-  def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
-    super(DenseNet, self).__init__()
-
-    if bottleneck:  nDenseBlocks = int( (depth-4) / 6 )
-    else         :  nDenseBlocks = int( (depth-4) / 3 )
-
-    self.message = 'CifarDenseNet : block : {:}, depth : {:}, reduction : {:}, growth-rate = {:}, class = {:}'.format('bottleneck' if bottleneck else 'basic', depth, reduction, growthRate, nClasses)
-
-    nChannels = 2*growthRate
-    self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1, bias=False)
-
-    self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
-    nChannels += nDenseBlocks*growthRate
-    nOutChannels = int(math.floor(nChannels*reduction))
-    self.trans1 = Transition(nChannels, nOutChannels)
-
-    nChannels = nOutChannels
-    self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
-    nChannels += nDenseBlocks*growthRate
-    nOutChannels = int(math.floor(nChannels*reduction))
-    self.trans2 = Transition(nChannels, nOutChannels)
-
-    nChannels = nOutChannels
-    self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
-    nChannels += nDenseBlocks*growthRate
-
-    self.act = nn.Sequential(
-                  nn.BatchNorm2d(nChannels), nn.ReLU(inplace=True),
-                  nn.AvgPool2d(8))
-    self.fc  = nn.Linear(nChannels, nClasses)
-
-    self.apply(initialize_resnet)
-
-  def get_message(self):
-    return self.message
-
-  def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
-    layers = []
-    for i in range(int(nDenseBlocks)):
-      if bottleneck:
-        layers.append(Bottleneck(nChannels, growthRate))
-      else:
-        layers.append(SingleLayer(nChannels, growthRate))
-      nChannels += growthRate
-    return nn.Sequential(*layers)
-
-  def forward(self, inputs):
-    out = self.conv1( inputs )
-    out = self.trans1(self.dense1(out))
-    out = self.trans2(self.dense2(out))
-    out = self.dense3(out)
-    features = self.act(out)
-    features = features.view(features.size(0), -1)
-    out = self.fc(features)
-    return features, out
--- a/lib/models/ImagenetResNet.py
+++ b/lib/models/ImagenetResNet.py
@@ -1,172 +0,0 @@
-# Deep Residual Learning for Image Recognition, CVPR 2016
-import torch.nn as nn
-from .initialization import initialize_resnet
-
-def conv3x3(in_planes, out_planes, stride=1, groups=1):
-  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
-
-
-def conv1x1(in_planes, out_planes, stride=1):
-  return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
-
-
-class BasicBlock(nn.Module):
-  expansion = 1
-
-  def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
-    super(BasicBlock, self).__init__()
-    if groups != 1 or base_width != 64:
-      raise ValueError('BasicBlock only supports groups=1 and base_width=64')
-    # Both self.conv1 and self.downsample layers downsample the input when stride != 1
-    self.conv1 = conv3x3(inplanes, planes, stride)
-    self.bn1   = nn.BatchNorm2d(planes)
-    self.relu  = nn.ReLU(inplace=True)
-    self.conv2 = conv3x3(planes, planes)
-    self.bn2   = nn.BatchNorm2d(planes)
-    self.downsample = downsample
-    self.stride = stride
-
-  def forward(self, x):
-    identity = x
-
-    out = self.conv1(x)
-    out = self.bn1(out)
-    out = self.relu(out)
-
-    out = self.conv2(out)
-    out = self.bn2(out)
-
-    if self.downsample is not None:
-      identity = self.downsample(x)
-
-    out += identity
-    out = self.relu(out)
-
-    return out
-
-
-class Bottleneck(nn.Module):
-  expansion = 4
-
-  def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
-    super(Bottleneck, self).__init__()
-    width = int(planes * (base_width / 64.)) * groups
-    # Both self.conv2 and self.downsample layers downsample the input when stride != 1
-    self.conv1 = conv1x1(inplanes, width)
-    self.bn1   = nn.BatchNorm2d(width)
-    self.conv2 = conv3x3(width, width, stride, groups)
-    self.bn2   = nn.BatchNorm2d(width)
-    self.conv3 = conv1x1(width, planes * self.expansion)
-    self.bn3   = nn.BatchNorm2d(planes * self.expansion)
-    self.relu  = nn.ReLU(inplace=True)
-    self.downsample = downsample
-    self.stride = stride
-
-  def forward(self, x):
-    identity = x
-
-    out = self.conv1(x)
-    out = self.bn1(out)
-    out = self.relu(out)
-
-    out = self.conv2(out)
-    out = self.bn2(out)
-    out = self.relu(out)
-
-    out = self.conv3(out)
-    out = self.bn3(out)
-
-    if self.downsample is not None:
-      identity = self.downsample(x)
-
-    out += identity
-    out = self.relu(out)
-
-    return out
-
-
-class ResNet(nn.Module):
-
-  def __init__(self, block_name, layers, deep_stem, num_classes, zero_init_residual, groups, width_per_group):
-    super(ResNet, self).__init__()
-
-    #planes = [int(width_per_group * groups * 2 ** i) for i in range(4)]
-    if block_name == 'BasicBlock'  : block= BasicBlock
-    elif block_name == 'Bottleneck': block= Bottleneck
-    else                           : raise ValueError('invalid block-name : {:}'.format(block_name))
-
-    if not deep_stem:
-      self.conv = nn.Sequential(
-                   nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
-                   nn.BatchNorm2d(64), nn.ReLU(inplace=True))
-    else:
-      self.conv = nn.Sequential(
-                   nn.Conv2d(           3, 32, kernel_size=3, stride=2, padding=1, bias=False),
-                   nn.BatchNorm2d(32), nn.ReLU(inplace=True),
-                   nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
-                   nn.BatchNorm2d(32), nn.ReLU(inplace=True),
-                   nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
-                   nn.BatchNorm2d(64), nn.ReLU(inplace=True))
-    self.inplanes = 64
-    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-    self.layer1 = self._make_layer(block, 64 , layers[0], stride=1, groups=groups, base_width=width_per_group)
-    self.layer2 = self._make_layer(block, 128, layers[1], stride=2, groups=groups, base_width=width_per_group)
-    self.layer3 = self._make_layer(block, 256, layers[2], stride=2, groups=groups, base_width=width_per_group)
-    self.layer4 = self._make_layer(block, 512, layers[3], stride=2, groups=groups, base_width=width_per_group)
-    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-    self.fc      = nn.Linear(512 * block.expansion, num_classes)
-    self.message = 'block = {:}, layers = {:}, deep_stem = {:}, num_classes = {:}'.format(block, layers, deep_stem, num_classes)
-
-    self.apply( initialize_resnet )
-
-    # Zero-initialize the last BN in each residual branch,
-    # so that the residual branch starts with zeros, and each residual block behaves like an identity.
-    # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
-    if zero_init_residual:
-      for m in self.modules():
-        if isinstance(m, Bottleneck):
-          nn.init.constant_(m.bn3.weight, 0)
-        elif isinstance(m, BasicBlock):
-          nn.init.constant_(m.bn2.weight, 0)
-
-  def _make_layer(self, block, planes, blocks, stride, groups, base_width):
-    downsample = None
-    if stride != 1 or self.inplanes != planes * block.expansion:
-      if stride == 2:
-        downsample = nn.Sequential(
-          nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
-          conv1x1(self.inplanes, planes * block.expansion, 1),
-          nn.BatchNorm2d(planes * block.expansion),
-        )
-      elif stride == 1:
-        downsample = nn.Sequential(
-          conv1x1(self.inplanes, planes * block.expansion, stride),
-          nn.BatchNorm2d(planes * block.expansion),
-        )
-      else: raise ValueError('invalid stride [{:}] for downsample'.format(stride))
-
-    layers = []
-    layers.append(block(self.inplanes, planes, stride, downsample, groups, base_width))
-    self.inplanes = planes * block.expansion
-    for _ in range(1, blocks):
-      layers.append(block(self.inplanes, planes, 1, None, groups, base_width))
-
-    return nn.Sequential(*layers)
-
-  def get_message(self):
-    return self.message
-
-  def forward(self, x):
-    x = self.conv(x)
-    x = self.maxpool(x)
-
-    x = self.layer1(x)
-    x = self.layer2(x)
-    x = self.layer3(x)
-    x = self.layer4(x)
-
-    features = self.avgpool(x)
-    features = features.view(features.size(0), -1)
-    logits   = self.fc(features)
-
-    return features, logits
--- a/lib/models/MobileNet.py
+++ b/lib/models/MobileNet.py
@@ -1,101 +0,0 @@
-# MobileNetV2: Inverted Residuals and Linear Bottlenecks, CVPR 2018
-from torch import nn
-from .initialization import initialize_resnet
-
-
-class ConvBNReLU(nn.Module):
-  def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
-    super(ConvBNReLU, self).__init__()
-    padding = (kernel_size - 1) // 2
-    self.conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False)
-    self.bn   = nn.BatchNorm2d(out_planes)
-    self.relu = nn.ReLU6(inplace=True)
-  
-  def forward(self, x):
-    out = self.conv( x )
-    out = self.bn  ( out )
-    out = self.relu( out )
-    return out
-
-
-class InvertedResidual(nn.Module):
-  def __init__(self, inp, oup, stride, expand_ratio):
-    super(InvertedResidual, self).__init__()
-    self.stride = stride
-    assert stride in [1, 2]
-
-    hidden_dim = int(round(inp * expand_ratio))
-    self.use_res_connect = self.stride == 1 and inp == oup
-
-    layers = []
-    if expand_ratio != 1:
-      # pw
-      layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
-    layers.extend([
-      # dw
-      ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
-      # pw-linear
-      nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
-      nn.BatchNorm2d(oup),
-    ])
-    self.conv = nn.Sequential(*layers)
-
-  def forward(self, x):
-    if self.use_res_connect:
-      return x + self.conv(x)
-    else:
-      return self.conv(x)
-
-
-class MobileNetV2(nn.Module):
-  def __init__(self, num_classes, width_mult, input_channel, last_channel, block_name, dropout):
-    super(MobileNetV2, self).__init__()
-    if block_name == 'InvertedResidual':
-      block = InvertedResidual
-    else:
-      raise ValueError('invalid block name : {:}'.format(block_name))
-    inverted_residual_setting = [
-      # t, c,  n, s
-      [1, 16 , 1, 1],
-      [6, 24 , 2, 2],
-      [6, 32 , 3, 2],
-      [6, 64 , 4, 2],
-      [6, 96 , 3, 1],
-      [6, 160, 3, 2],
-      [6, 320, 1, 1],
-    ]
-
-    # building first layer
-    input_channel = int(input_channel * width_mult)
-    self.last_channel = int(last_channel * max(1.0, width_mult))
-    features = [ConvBNReLU(3, input_channel, stride=2)]
-    # building inverted residual blocks
-    for t, c, n, s in inverted_residual_setting:
-      output_channel = int(c * width_mult)
-      for i in range(n):
-        stride = s if i == 0 else 1
-        features.append(block(input_channel, output_channel, stride, expand_ratio=t))
-        input_channel = output_channel
-    # building last several layers
-    features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
-    # make it nn.Sequential
-    self.features = nn.Sequential(*features)
-
-    # building classifier
-    self.classifier = nn.Sequential(
-      nn.Dropout(dropout),
-      nn.Linear(self.last_channel, num_classes),
-    )
-    self.message = 'MobileNetV2 : width_mult={:}, in-C={:}, last-C={:}, block={:}, dropout={:}'.format(width_mult, input_channel, last_channel, block_name, dropout)
-
-    # weight initialization
-    self.apply( initialize_resnet )
-
-  def get_message(self):
-    return self.message
-
-  def forward(self, inputs):
-    features = self.features(inputs)
-    vectors  = features.mean([2, 3])
-    predicts = self.classifier(vectors)
-    return features, predicts
--- a/lib/models/ShuffleNetV2.py
+++ b/lib/models/ShuffleNetV2.py
@@ -1,133 +0,0 @@
-import functools
-
-import torch
-import torch.nn as nn
-
-__all__ = ['ShuffleNetV2']
-
-
-def channel_shuffle(x, groups):
-  batchsize, num_channels, height, width = x.data.size()
-  channels_per_group = num_channels // groups
-
-  # reshape
-  x = x.view(batchsize, groups, channels_per_group, height, width)
-
-  x = torch.transpose(x, 1, 2).contiguous()
-
-  # flatten
-  x = x.view(batchsize, -1, height, width)
-
-  return x
-
-
-class InvertedResidual(nn.Module):
-  def __init__(self, inp, oup, stride):
-    super(InvertedResidual, self).__init__()
-
-    if not (1 <= stride <= 3):
-      raise ValueError('illegal stride value')
-    self.stride = stride
-
-    branch_features = oup // 2
-    assert (self.stride != 1) or (inp == branch_features << 1)
-
-    pw_conv11 = functools.partial(nn.Conv2d, kernel_size=1, stride=1, padding=0, bias=False)
-    dw_conv33 = functools.partial(self.depthwise_conv, kernel_size=3, stride=self.stride, padding=1)
-
-    if self.stride > 1:
-      self.branch1 = nn.Sequential(
-        dw_conv33(inp, inp),
-        nn.BatchNorm2d(inp),
-        pw_conv11(inp, branch_features),
-        nn.BatchNorm2d(branch_features),
-        nn.ReLU(inplace=True),
-      )
-
-    self.branch2 = nn.Sequential(
-      pw_conv11(inp if (self.stride > 1) else branch_features, branch_features),
-      nn.BatchNorm2d(branch_features),
-      nn.ReLU(inplace=True),
-      dw_conv33(branch_features, branch_features),
-      nn.BatchNorm2d(branch_features),
-      pw_conv11(branch_features, branch_features),
-      nn.BatchNorm2d(branch_features),
-      nn.ReLU(inplace=True),
-    )
-
-  @staticmethod
-  def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
-    return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
-
-  def forward(self, x):
-    if self.stride == 1:
-      x1, x2 = x.chunk(2, dim=1)
-      out = torch.cat((x1, self.branch2(x2)), dim=1)
-    else:
-      out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
-
-    out = channel_shuffle(out, 2)
-    return out
-
-
-class ShuffleNetV2(nn.Module):
-  def __init__(self, num_classes, stages):
-    super(ShuffleNetV2, self).__init__()
-
-    self.stage_out_channels = stages
-    assert len(stages) == 5, 'invalid stages : {:}'.format(stages)
-    self.message = 'stages: ' + ' '.join([str(x) for x in stages])
-
-    input_channels = 3
-    output_channels = self.stage_out_channels[0]
-    self.conv1 = nn.Sequential(
-      nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
-      nn.BatchNorm2d(output_channels),
-      nn.ReLU(inplace=True),
-    )
-    input_channels = output_channels
-
-    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-    stage_names = ['stage{:}'.format(i) for i in [2, 3, 4]]
-    stage_repeats = [4, 8, 4]
-    for name, repeats, output_channels in zip(
-        stage_names, stage_repeats, self.stage_out_channels[1:]):
-      seq = [InvertedResidual(input_channels, output_channels, 2)]
-      for i in range(repeats - 1):
-        seq.append(InvertedResidual(output_channels, output_channels, 1))
-      setattr(self, name, nn.Sequential(*seq))
-      input_channels = output_channels
-
-    output_channels = self.stage_out_channels[-1]
-    self.conv5 = nn.Sequential(
-      nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
-      nn.BatchNorm2d(output_channels),
-      nn.ReLU(inplace=True),
-    )
-
-    self.fc = nn.Linear(output_channels, num_classes)
-
-  def get_message(self):
-    return self.message
-
-  def forward(self, inputs):
-    x = self.conv1( inputs )
-    x = self.maxpool(x)
-    x = self.stage2(x)
-    x = self.stage3(x)
-    x = self.stage4(x)
-    x = self.conv5(x)
-    features = x.mean([2, 3])  # globalpool
-    predicts = self.fc(features)
-    return features, predicts
-
-  #@staticmethod
-  #def _getStages(mult):
-  #  stages = {
-  #    '0.5': [24, 48,  96 , 192, 1024],
-  #    '1.0': [24, 116, 232, 464, 1024],
-  #    '1.5': [24, 176, 352, 704, 1024],
-  #    '2.0': [24, 244, 488, 976, 2048],
-  #  }
-  #  return stages[str(mult)]
--- a/lib/models/init.py
+++ b/lib/models/init.py
@@ -11,15 +11,12 @@ from .clone_weights import init_from_model

 def get_cifar_models(config):
  from .CifarResNet      import CifarResNet
-  from .CifarDenseNet    import DenseNet
  from .CifarWideResNet  import CifarWideResNet
  
  super_type = getattr(config, 'super_type', 'basic')
  if super_type == 'basic':
    if config.arch == 'resnet':
      return CifarResNet(config.module, config.depth, config.class_num, config.zero_init_residual)
-    elif config.arch == 'densenet':
-      return DenseNet(config.growthRate, config.depth, config.reduction, config.class_num, config.bottleneck)
    elif config.arch == 'wideresnet':
      return CifarWideResNet(config.depth, config.wide_factor, config.class_num, config.dropout)
    else:
@@ -44,10 +41,8 @@ def get_cifar_models(config):

 def get_imagenet_models(config):
  super_type = getattr(config, 'super_type', 'basic')
-  if super_type == 'basic':
-    return get_imagenet_models_basic(config)
  # NAS searched architecture
-  elif super_type.startswith('infer'):
+  if super_type.startswith('infer'):
    assert len(super_type.split('-')) == 2, 'invalid super_type : {:}'.format(super_type)
    infer_mode = super_type.split('-')[1]
    if infer_mode == 'shape':
@@ -65,20 +60,6 @@ def get_imagenet_models(config):
    raise ValueError('invalid super-type : {:}'.format(super_type))


-def get_imagenet_models_basic(config):
-  from .ImagenetResNet import ResNet
-  from .MobileNet      import MobileNetV2
-  from .ShuffleNetV2   import ShuffleNetV2
-  if config.arch == 'resnet':
-    return ResNet(config.block_name, config.layers, config.deep_stem, config.class_num, config.zero_init_residual, config.groups, config.width_per_group)
-  elif config.arch == 'MobileNetV2':
-    return MobileNetV2(config.class_num, config.width_mult, config.input_channel, config.last_channel, config.block_name, config.dropout)
-  elif config.arch == 'ShuffleNetV2':
-    return ShuffleNetV2(config.class_num, config.stages)
-  else:
-    raise ValueError('invalid arch : {:}'.format( config.arch ))
-    
-
 def obtain_model(config):
  if config.dataset == 'cifar':
    return get_cifar_models(config)
--- a/lib/models/sphereface.py
+++ b/lib/models/sphereface.py
@@ -1,139 +0,0 @@
-# SphereFace: Deep Hypersphere Embedding for Face Recognition
-#
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import math
-
-def myphi(x,m):
-  x = x * m
-  return 1-x**2/math.factorial(2)+x**4/math.factorial(4)-x**6/math.factorial(6) + \
-      x**8/math.factorial(8) - x**9/math.factorial(9)
-
-class AngleLinear(nn.Module):
-  def __init__(self, in_features, out_features, m = 4, phiflag=True):
-    super(AngleLinear, self).__init__()
-    self.in_features = in_features
-    self.out_features = out_features
-    self.weight = nn.Parameter(torch.Tensor(in_features,out_features))
-    self.weight.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
-    self.phiflag = phiflag
-    self.m = m
-    self.mlambda = [
-      lambda x: x**0,
-      lambda x: x**1,
-      lambda x: 2*x**2-1,
-      lambda x: 4*x**3-3*x,
-      lambda x: 8*x**4-8*x**2+1,
-      lambda x: 16*x**5-20*x**3+5*x
-    ]
-
-  def forward(self, input):
-    x = input   # size=(B,F)  F is feature len
-    w = self.weight # size=(F,Classnum) F=in_features Classnum=out_features
-
-    ww = w.renorm(2,1,1e-5).mul(1e5)
-    xlen = x.pow(2).sum(1).pow(0.5) # size=B
-    wlen = ww.pow(2).sum(0).pow(0.5) # size=Classnum
-
-    cos_theta = x.mm(ww) # size=(B,Classnum)
-    cos_theta = cos_theta / xlen.view(-1,1) / wlen.view(1,-1)
-    cos_theta = cos_theta.clamp(-1,1)
-
-    if self.phiflag:
-      cos_m_theta = self.mlambda[self.m](cos_theta)
-      with torch.no_grad():
-        theta = cos_theta.acos()
-      k = (self.m*theta/3.14159265).floor()
-      n_one = k*0.0 - 1
-      phi_theta = (n_one**k) * cos_m_theta - 2*k
-    else:
-      theta = cos_theta.acos()
-      phi_theta = myphi(theta,self.m)
-      phi_theta = phi_theta.clamp(-1*self.m,1)
-
-    cos_theta = cos_theta * xlen.view(-1,1)
-    phi_theta = phi_theta * xlen.view(-1,1)
-    output = (cos_theta,phi_theta)
-    return output # size=(B,Classnum,2)
-
-
-class SphereFace20(nn.Module):
-  def __init__(self, classnum=10574):
-    super(SphereFace20, self).__init__()
-    self.classnum = classnum
-    #input = B*3*112*96
-    self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48
-    self.relu1_1 = nn.PReLU(64)
-    self.conv1_2 = nn.Conv2d(64,64,3,1,1)
-    self.relu1_2 = nn.PReLU(64)
-    self.conv1_3 = nn.Conv2d(64,64,3,1,1)
-    self.relu1_3 = nn.PReLU(64)
-
-    self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24
-    self.relu2_1 = nn.PReLU(128)
-    self.conv2_2 = nn.Conv2d(128,128,3,1,1)
-    self.relu2_2 = nn.PReLU(128)
-    self.conv2_3 = nn.Conv2d(128,128,3,1,1)
-    self.relu2_3 = nn.PReLU(128)
-
-    self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24
-    self.relu2_4 = nn.PReLU(128)
-    self.conv2_5 = nn.Conv2d(128,128,3,1,1)
-    self.relu2_5 = nn.PReLU(128)
-
-
-    self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12
-    self.relu3_1 = nn.PReLU(256)
-    self.conv3_2 = nn.Conv2d(256,256,3,1,1)
-    self.relu3_2 = nn.PReLU(256)
-    self.conv3_3 = nn.Conv2d(256,256,3,1,1)
-    self.relu3_3 = nn.PReLU(256)
-
-    self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
-    self.relu3_4 = nn.PReLU(256)
-    self.conv3_5 = nn.Conv2d(256,256,3,1,1)
-    self.relu3_5 = nn.PReLU(256)
-
-    self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
-    self.relu3_6 = nn.PReLU(256)
-    self.conv3_7 = nn.Conv2d(256,256,3,1,1)
-    self.relu3_7 = nn.PReLU(256)
-
-    self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
-    self.relu3_8 = nn.PReLU(256)
-    self.conv3_9 = nn.Conv2d(256,256,3,1,1)
-    self.relu3_9 = nn.PReLU(256)
-
-    self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6
-    self.relu4_1 = nn.PReLU(512)
-    self.conv4_2 = nn.Conv2d(512,512,3,1,1)
-    self.relu4_2 = nn.PReLU(512)
-    self.conv4_3 = nn.Conv2d(512,512,3,1,1)
-    self.relu4_3 = nn.PReLU(512)
-
-    self.fc5 = nn.Linear(512*7*6,512)
-    self.fc6 = AngleLinear(512, self.classnum)
-
-
-  def forward(self, x):
-    x = self.relu1_1(self.conv1_1(x))
-    x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x))))
-
-    x = self.relu2_1(self.conv2_1(x))
-    x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x))))
-    x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x))))
-
-    x = self.relu3_1(self.conv3_1(x))
-    x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x))))
-    x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x))))
-    x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x))))
-    x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x))))
-
-    x = self.relu4_1(self.conv4_1(x))
-    x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x))))
-
-    x = x.view(x.size(0),-1)
-    features = self.fc5(x)
-    logits   = self.fc6(features)
-    return features, logits