update README
This commit is contained in:
@@ -1,105 +0,0 @@
|
||||
##################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
|
||||
##################################################
|
||||
import math, torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from .initialization import initialize_resnet
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
def __init__(self, nChannels, growthRate):
|
||||
super(Bottleneck, self).__init__()
|
||||
interChannels = 4*growthRate
|
||||
self.bn1 = nn.BatchNorm2d(nChannels)
|
||||
self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(interChannels)
|
||||
self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(F.relu(self.bn1(x)))
|
||||
out = self.conv2(F.relu(self.bn2(out)))
|
||||
out = torch.cat((x, out), 1)
|
||||
return out
|
||||
|
||||
|
||||
class SingleLayer(nn.Module):
|
||||
def __init__(self, nChannels, growthRate):
|
||||
super(SingleLayer, self).__init__()
|
||||
self.bn1 = nn.BatchNorm2d(nChannels)
|
||||
self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(F.relu(self.bn1(x)))
|
||||
out = torch.cat((x, out), 1)
|
||||
return out
|
||||
|
||||
|
||||
class Transition(nn.Module):
|
||||
def __init__(self, nChannels, nOutChannels):
|
||||
super(Transition, self).__init__()
|
||||
self.bn1 = nn.BatchNorm2d(nChannels)
|
||||
self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(F.relu(self.bn1(x)))
|
||||
out = F.avg_pool2d(out, 2)
|
||||
return out
|
||||
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
if bottleneck: nDenseBlocks = int( (depth-4) / 6 )
|
||||
else : nDenseBlocks = int( (depth-4) / 3 )
|
||||
|
||||
self.message = 'CifarDenseNet : block : {:}, depth : {:}, reduction : {:}, growth-rate = {:}, class = {:}'.format('bottleneck' if bottleneck else 'basic', depth, reduction, growthRate, nClasses)
|
||||
|
||||
nChannels = 2*growthRate
|
||||
self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
|
||||
nChannels += nDenseBlocks*growthRate
|
||||
nOutChannels = int(math.floor(nChannels*reduction))
|
||||
self.trans1 = Transition(nChannels, nOutChannels)
|
||||
|
||||
nChannels = nOutChannels
|
||||
self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
|
||||
nChannels += nDenseBlocks*growthRate
|
||||
nOutChannels = int(math.floor(nChannels*reduction))
|
||||
self.trans2 = Transition(nChannels, nOutChannels)
|
||||
|
||||
nChannels = nOutChannels
|
||||
self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
|
||||
nChannels += nDenseBlocks*growthRate
|
||||
|
||||
self.act = nn.Sequential(
|
||||
nn.BatchNorm2d(nChannels), nn.ReLU(inplace=True),
|
||||
nn.AvgPool2d(8))
|
||||
self.fc = nn.Linear(nChannels, nClasses)
|
||||
|
||||
self.apply(initialize_resnet)
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
|
||||
layers = []
|
||||
for i in range(int(nDenseBlocks)):
|
||||
if bottleneck:
|
||||
layers.append(Bottleneck(nChannels, growthRate))
|
||||
else:
|
||||
layers.append(SingleLayer(nChannels, growthRate))
|
||||
nChannels += growthRate
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, inputs):
|
||||
out = self.conv1( inputs )
|
||||
out = self.trans1(self.dense1(out))
|
||||
out = self.trans2(self.dense2(out))
|
||||
out = self.dense3(out)
|
||||
features = self.act(out)
|
||||
features = features.view(features.size(0), -1)
|
||||
out = self.fc(features)
|
||||
return features, out
|
@@ -1,172 +0,0 @@
|
||||
# Deep Residual Learning for Image Recognition, CVPR 2016
|
||||
import torch.nn as nn
|
||||
from .initialization import initialize_resnet
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1):
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
|
||||
|
||||
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
|
||||
super(BasicBlock, self).__init__()
|
||||
if groups != 1 or base_width != 64:
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
|
||||
super(Bottleneck, self).__init__()
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv1x1(inplanes, width)
|
||||
self.bn1 = nn.BatchNorm2d(width)
|
||||
self.conv2 = conv3x3(width, width, stride, groups)
|
||||
self.bn2 = nn.BatchNorm2d(width)
|
||||
self.conv3 = conv1x1(width, planes * self.expansion)
|
||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block_name, layers, deep_stem, num_classes, zero_init_residual, groups, width_per_group):
|
||||
super(ResNet, self).__init__()
|
||||
|
||||
#planes = [int(width_per_group * groups * 2 ** i) for i in range(4)]
|
||||
if block_name == 'BasicBlock' : block= BasicBlock
|
||||
elif block_name == 'Bottleneck': block= Bottleneck
|
||||
else : raise ValueError('invalid block-name : {:}'.format(block_name))
|
||||
|
||||
if not deep_stem:
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
|
||||
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
|
||||
else:
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d( 3, 32, kernel_size=3, stride=2, padding=1, bias=False),
|
||||
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
|
||||
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
||||
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
|
||||
self.inplanes = 64
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64 , layers[0], stride=1, groups=groups, base_width=width_per_group)
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, groups=groups, base_width=width_per_group)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, groups=groups, base_width=width_per_group)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, groups=groups, base_width=width_per_group)
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
self.message = 'block = {:}, layers = {:}, deep_stem = {:}, num_classes = {:}'.format(block, layers, deep_stem, num_classes)
|
||||
|
||||
self.apply( initialize_resnet )
|
||||
|
||||
# Zero-initialize the last BN in each residual branch,
|
||||
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
|
||||
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
|
||||
if zero_init_residual:
|
||||
for m in self.modules():
|
||||
if isinstance(m, Bottleneck):
|
||||
nn.init.constant_(m.bn3.weight, 0)
|
||||
elif isinstance(m, BasicBlock):
|
||||
nn.init.constant_(m.bn2.weight, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride, groups, base_width):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
if stride == 2:
|
||||
downsample = nn.Sequential(
|
||||
nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
|
||||
conv1x1(self.inplanes, planes * block.expansion, 1),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
elif stride == 1:
|
||||
downsample = nn.Sequential(
|
||||
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
else: raise ValueError('invalid stride [{:}] for downsample'.format(stride))
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, groups, base_width))
|
||||
self.inplanes = planes * block.expansion
|
||||
for _ in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, 1, None, groups, base_width))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
features = self.avgpool(x)
|
||||
features = features.view(features.size(0), -1)
|
||||
logits = self.fc(features)
|
||||
|
||||
return features, logits
|
@@ -1,101 +0,0 @@
|
||||
# MobileNetV2: Inverted Residuals and Linear Bottlenecks, CVPR 2018
|
||||
from torch import nn
|
||||
from .initialization import initialize_resnet
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Module):
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
padding = (kernel_size - 1) // 2
|
||||
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False)
|
||||
self.bn = nn.BatchNorm2d(out_planes)
|
||||
self.relu = nn.ReLU6(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv( x )
|
||||
out = self.bn ( out )
|
||||
out = self.relu( out )
|
||||
return out
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
# pw
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
])
|
||||
self.conv = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
def __init__(self, num_classes, width_mult, input_channel, last_channel, block_name, dropout):
|
||||
super(MobileNetV2, self).__init__()
|
||||
if block_name == 'InvertedResidual':
|
||||
block = InvertedResidual
|
||||
else:
|
||||
raise ValueError('invalid block name : {:}'.format(block_name))
|
||||
inverted_residual_setting = [
|
||||
# t, c, n, s
|
||||
[1, 16 , 1, 1],
|
||||
[6, 24 , 2, 2],
|
||||
[6, 32 , 3, 2],
|
||||
[6, 64 , 4, 2],
|
||||
[6, 96 , 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# building first layer
|
||||
input_channel = int(input_channel * width_mult)
|
||||
self.last_channel = int(last_channel * max(1.0, width_mult))
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in inverted_residual_setting:
|
||||
output_channel = int(c * width_mult)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
|
||||
# make it nn.Sequential
|
||||
self.features = nn.Sequential(*features)
|
||||
|
||||
# building classifier
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(dropout),
|
||||
nn.Linear(self.last_channel, num_classes),
|
||||
)
|
||||
self.message = 'MobileNetV2 : width_mult={:}, in-C={:}, last-C={:}, block={:}, dropout={:}'.format(width_mult, input_channel, last_channel, block_name, dropout)
|
||||
|
||||
# weight initialization
|
||||
self.apply( initialize_resnet )
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def forward(self, inputs):
|
||||
features = self.features(inputs)
|
||||
vectors = features.mean([2, 3])
|
||||
predicts = self.classifier(vectors)
|
||||
return features, predicts
|
@@ -1,133 +0,0 @@
|
||||
import functools
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
__all__ = ['ShuffleNetV2']
|
||||
|
||||
|
||||
def channel_shuffle(x, groups):
|
||||
batchsize, num_channels, height, width = x.data.size()
|
||||
channels_per_group = num_channels // groups
|
||||
|
||||
# reshape
|
||||
x = x.view(batchsize, groups, channels_per_group, height, width)
|
||||
|
||||
x = torch.transpose(x, 1, 2).contiguous()
|
||||
|
||||
# flatten
|
||||
x = x.view(batchsize, -1, height, width)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride):
|
||||
super(InvertedResidual, self).__init__()
|
||||
|
||||
if not (1 <= stride <= 3):
|
||||
raise ValueError('illegal stride value')
|
||||
self.stride = stride
|
||||
|
||||
branch_features = oup // 2
|
||||
assert (self.stride != 1) or (inp == branch_features << 1)
|
||||
|
||||
pw_conv11 = functools.partial(nn.Conv2d, kernel_size=1, stride=1, padding=0, bias=False)
|
||||
dw_conv33 = functools.partial(self.depthwise_conv, kernel_size=3, stride=self.stride, padding=1)
|
||||
|
||||
if self.stride > 1:
|
||||
self.branch1 = nn.Sequential(
|
||||
dw_conv33(inp, inp),
|
||||
nn.BatchNorm2d(inp),
|
||||
pw_conv11(inp, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
self.branch2 = nn.Sequential(
|
||||
pw_conv11(inp if (self.stride > 1) else branch_features, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
nn.ReLU(inplace=True),
|
||||
dw_conv33(branch_features, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
pw_conv11(branch_features, branch_features),
|
||||
nn.BatchNorm2d(branch_features),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
|
||||
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
|
||||
|
||||
def forward(self, x):
|
||||
if self.stride == 1:
|
||||
x1, x2 = x.chunk(2, dim=1)
|
||||
out = torch.cat((x1, self.branch2(x2)), dim=1)
|
||||
else:
|
||||
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
|
||||
|
||||
out = channel_shuffle(out, 2)
|
||||
return out
|
||||
|
||||
|
||||
class ShuffleNetV2(nn.Module):
|
||||
def __init__(self, num_classes, stages):
|
||||
super(ShuffleNetV2, self).__init__()
|
||||
|
||||
self.stage_out_channels = stages
|
||||
assert len(stages) == 5, 'invalid stages : {:}'.format(stages)
|
||||
self.message = 'stages: ' + ' '.join([str(x) for x in stages])
|
||||
|
||||
input_channels = 3
|
||||
output_channels = self.stage_out_channels[0]
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
|
||||
nn.BatchNorm2d(output_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
input_channels = output_channels
|
||||
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
stage_names = ['stage{:}'.format(i) for i in [2, 3, 4]]
|
||||
stage_repeats = [4, 8, 4]
|
||||
for name, repeats, output_channels in zip(
|
||||
stage_names, stage_repeats, self.stage_out_channels[1:]):
|
||||
seq = [InvertedResidual(input_channels, output_channels, 2)]
|
||||
for i in range(repeats - 1):
|
||||
seq.append(InvertedResidual(output_channels, output_channels, 1))
|
||||
setattr(self, name, nn.Sequential(*seq))
|
||||
input_channels = output_channels
|
||||
|
||||
output_channels = self.stage_out_channels[-1]
|
||||
self.conv5 = nn.Sequential(
|
||||
nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(output_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
)
|
||||
|
||||
self.fc = nn.Linear(output_channels, num_classes)
|
||||
|
||||
def get_message(self):
|
||||
return self.message
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.conv1( inputs )
|
||||
x = self.maxpool(x)
|
||||
x = self.stage2(x)
|
||||
x = self.stage3(x)
|
||||
x = self.stage4(x)
|
||||
x = self.conv5(x)
|
||||
features = x.mean([2, 3]) # globalpool
|
||||
predicts = self.fc(features)
|
||||
return features, predicts
|
||||
|
||||
#@staticmethod
|
||||
#def _getStages(mult):
|
||||
# stages = {
|
||||
# '0.5': [24, 48, 96 , 192, 1024],
|
||||
# '1.0': [24, 116, 232, 464, 1024],
|
||||
# '1.5': [24, 176, 352, 704, 1024],
|
||||
# '2.0': [24, 244, 488, 976, 2048],
|
||||
# }
|
||||
# return stages[str(mult)]
|
@@ -11,15 +11,12 @@ from .clone_weights import init_from_model
|
||||
|
||||
def get_cifar_models(config):
|
||||
from .CifarResNet import CifarResNet
|
||||
from .CifarDenseNet import DenseNet
|
||||
from .CifarWideResNet import CifarWideResNet
|
||||
|
||||
super_type = getattr(config, 'super_type', 'basic')
|
||||
if super_type == 'basic':
|
||||
if config.arch == 'resnet':
|
||||
return CifarResNet(config.module, config.depth, config.class_num, config.zero_init_residual)
|
||||
elif config.arch == 'densenet':
|
||||
return DenseNet(config.growthRate, config.depth, config.reduction, config.class_num, config.bottleneck)
|
||||
elif config.arch == 'wideresnet':
|
||||
return CifarWideResNet(config.depth, config.wide_factor, config.class_num, config.dropout)
|
||||
else:
|
||||
@@ -44,10 +41,8 @@ def get_cifar_models(config):
|
||||
|
||||
def get_imagenet_models(config):
|
||||
super_type = getattr(config, 'super_type', 'basic')
|
||||
if super_type == 'basic':
|
||||
return get_imagenet_models_basic(config)
|
||||
# NAS searched architecture
|
||||
elif super_type.startswith('infer'):
|
||||
if super_type.startswith('infer'):
|
||||
assert len(super_type.split('-')) == 2, 'invalid super_type : {:}'.format(super_type)
|
||||
infer_mode = super_type.split('-')[1]
|
||||
if infer_mode == 'shape':
|
||||
@@ -65,20 +60,6 @@ def get_imagenet_models(config):
|
||||
raise ValueError('invalid super-type : {:}'.format(super_type))
|
||||
|
||||
|
||||
def get_imagenet_models_basic(config):
|
||||
from .ImagenetResNet import ResNet
|
||||
from .MobileNet import MobileNetV2
|
||||
from .ShuffleNetV2 import ShuffleNetV2
|
||||
if config.arch == 'resnet':
|
||||
return ResNet(config.block_name, config.layers, config.deep_stem, config.class_num, config.zero_init_residual, config.groups, config.width_per_group)
|
||||
elif config.arch == 'MobileNetV2':
|
||||
return MobileNetV2(config.class_num, config.width_mult, config.input_channel, config.last_channel, config.block_name, config.dropout)
|
||||
elif config.arch == 'ShuffleNetV2':
|
||||
return ShuffleNetV2(config.class_num, config.stages)
|
||||
else:
|
||||
raise ValueError('invalid arch : {:}'.format( config.arch ))
|
||||
|
||||
|
||||
def obtain_model(config):
|
||||
if config.dataset == 'cifar':
|
||||
return get_cifar_models(config)
|
||||
|
@@ -1,139 +0,0 @@
|
||||
# SphereFace: Deep Hypersphere Embedding for Face Recognition
|
||||
#
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
|
||||
def myphi(x,m):
|
||||
x = x * m
|
||||
return 1-x**2/math.factorial(2)+x**4/math.factorial(4)-x**6/math.factorial(6) + \
|
||||
x**8/math.factorial(8) - x**9/math.factorial(9)
|
||||
|
||||
class AngleLinear(nn.Module):
|
||||
def __init__(self, in_features, out_features, m = 4, phiflag=True):
|
||||
super(AngleLinear, self).__init__()
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
self.weight = nn.Parameter(torch.Tensor(in_features,out_features))
|
||||
self.weight.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
|
||||
self.phiflag = phiflag
|
||||
self.m = m
|
||||
self.mlambda = [
|
||||
lambda x: x**0,
|
||||
lambda x: x**1,
|
||||
lambda x: 2*x**2-1,
|
||||
lambda x: 4*x**3-3*x,
|
||||
lambda x: 8*x**4-8*x**2+1,
|
||||
lambda x: 16*x**5-20*x**3+5*x
|
||||
]
|
||||
|
||||
def forward(self, input):
|
||||
x = input # size=(B,F) F is feature len
|
||||
w = self.weight # size=(F,Classnum) F=in_features Classnum=out_features
|
||||
|
||||
ww = w.renorm(2,1,1e-5).mul(1e5)
|
||||
xlen = x.pow(2).sum(1).pow(0.5) # size=B
|
||||
wlen = ww.pow(2).sum(0).pow(0.5) # size=Classnum
|
||||
|
||||
cos_theta = x.mm(ww) # size=(B,Classnum)
|
||||
cos_theta = cos_theta / xlen.view(-1,1) / wlen.view(1,-1)
|
||||
cos_theta = cos_theta.clamp(-1,1)
|
||||
|
||||
if self.phiflag:
|
||||
cos_m_theta = self.mlambda[self.m](cos_theta)
|
||||
with torch.no_grad():
|
||||
theta = cos_theta.acos()
|
||||
k = (self.m*theta/3.14159265).floor()
|
||||
n_one = k*0.0 - 1
|
||||
phi_theta = (n_one**k) * cos_m_theta - 2*k
|
||||
else:
|
||||
theta = cos_theta.acos()
|
||||
phi_theta = myphi(theta,self.m)
|
||||
phi_theta = phi_theta.clamp(-1*self.m,1)
|
||||
|
||||
cos_theta = cos_theta * xlen.view(-1,1)
|
||||
phi_theta = phi_theta * xlen.view(-1,1)
|
||||
output = (cos_theta,phi_theta)
|
||||
return output # size=(B,Classnum,2)
|
||||
|
||||
|
||||
class SphereFace20(nn.Module):
|
||||
def __init__(self, classnum=10574):
|
||||
super(SphereFace20, self).__init__()
|
||||
self.classnum = classnum
|
||||
#input = B*3*112*96
|
||||
self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48
|
||||
self.relu1_1 = nn.PReLU(64)
|
||||
self.conv1_2 = nn.Conv2d(64,64,3,1,1)
|
||||
self.relu1_2 = nn.PReLU(64)
|
||||
self.conv1_3 = nn.Conv2d(64,64,3,1,1)
|
||||
self.relu1_3 = nn.PReLU(64)
|
||||
|
||||
self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24
|
||||
self.relu2_1 = nn.PReLU(128)
|
||||
self.conv2_2 = nn.Conv2d(128,128,3,1,1)
|
||||
self.relu2_2 = nn.PReLU(128)
|
||||
self.conv2_3 = nn.Conv2d(128,128,3,1,1)
|
||||
self.relu2_3 = nn.PReLU(128)
|
||||
|
||||
self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24
|
||||
self.relu2_4 = nn.PReLU(128)
|
||||
self.conv2_5 = nn.Conv2d(128,128,3,1,1)
|
||||
self.relu2_5 = nn.PReLU(128)
|
||||
|
||||
|
||||
self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12
|
||||
self.relu3_1 = nn.PReLU(256)
|
||||
self.conv3_2 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_2 = nn.PReLU(256)
|
||||
self.conv3_3 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_3 = nn.PReLU(256)
|
||||
|
||||
self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
|
||||
self.relu3_4 = nn.PReLU(256)
|
||||
self.conv3_5 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_5 = nn.PReLU(256)
|
||||
|
||||
self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
|
||||
self.relu3_6 = nn.PReLU(256)
|
||||
self.conv3_7 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_7 = nn.PReLU(256)
|
||||
|
||||
self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12
|
||||
self.relu3_8 = nn.PReLU(256)
|
||||
self.conv3_9 = nn.Conv2d(256,256,3,1,1)
|
||||
self.relu3_9 = nn.PReLU(256)
|
||||
|
||||
self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6
|
||||
self.relu4_1 = nn.PReLU(512)
|
||||
self.conv4_2 = nn.Conv2d(512,512,3,1,1)
|
||||
self.relu4_2 = nn.PReLU(512)
|
||||
self.conv4_3 = nn.Conv2d(512,512,3,1,1)
|
||||
self.relu4_3 = nn.PReLU(512)
|
||||
|
||||
self.fc5 = nn.Linear(512*7*6,512)
|
||||
self.fc6 = AngleLinear(512, self.classnum)
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu1_1(self.conv1_1(x))
|
||||
x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x))))
|
||||
|
||||
x = self.relu2_1(self.conv2_1(x))
|
||||
x = x + self.relu2_3(self.conv2_3(self.relu2_2(self.conv2_2(x))))
|
||||
x = x + self.relu2_5(self.conv2_5(self.relu2_4(self.conv2_4(x))))
|
||||
|
||||
x = self.relu3_1(self.conv3_1(x))
|
||||
x = x + self.relu3_3(self.conv3_3(self.relu3_2(self.conv3_2(x))))
|
||||
x = x + self.relu3_5(self.conv3_5(self.relu3_4(self.conv3_4(x))))
|
||||
x = x + self.relu3_7(self.conv3_7(self.relu3_6(self.conv3_6(x))))
|
||||
x = x + self.relu3_9(self.conv3_9(self.relu3_8(self.conv3_8(x))))
|
||||
|
||||
x = self.relu4_1(self.conv4_1(x))
|
||||
x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x))))
|
||||
|
||||
x = x.view(x.size(0),-1)
|
||||
features = self.fc5(x)
|
||||
logits = self.fc6(features)
|
||||
return features, logits
|
Reference in New Issue
Block a user