add naswot

This commit is contained in:
mhz
2024-07-27 16:40:56 +02:00
parent 55ff19421d
commit 93ced7700d
98 changed files with 13176 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
from .configure_utils import load_config, dict2config, configure2str
from .basic_args import obtain_basic_args
from .attention_args import obtain_attention_args
from .random_baseline import obtain_RandomSearch_args
from .cls_kd_args import obtain_cls_kd_args
from .cls_init_args import obtain_cls_init_args
from .search_single_args import obtain_search_single_args
from .search_args import obtain_search_args
# for network pruning
from .pruning_args import obtain_pruning_args

View File

@@ -0,0 +1,22 @@
import random, argparse
from .share_args import add_shared_args
def obtain_attention_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--init_model' , type=str, help='The initialization model path.')
parser.add_argument('--model_config', type=str, help='The path to the model configuration')
parser.add_argument('--optim_config', type=str, help='The path to the optimizer configuration')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
parser.add_argument('--att_channel' , type=int, help='.')
parser.add_argument('--att_spatial' , type=str, help='.')
parser.add_argument('--att_active' , type=str, help='.')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size', type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
return args

View File

@@ -0,0 +1,24 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2020 #
##################################################
import random, argparse
from .share_args import add_shared_args
def obtain_basic_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--init_model' , type=str, help='The initialization model path.')
parser.add_argument('--model_config', type=str, help='The path to the model configuration')
parser.add_argument('--optim_config', type=str, help='The path to the optimizer configuration')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
parser.add_argument('--model_source', type=str, default='normal',help='The source of model defination.')
parser.add_argument('--extra_model_path', type=str, default=None, help='The extra model ckp file (help to indicate the searched architecture).')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size', type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
return args

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,20 @@
import random, argparse
from .share_args import add_shared_args
def obtain_cls_init_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--init_model' , type=str, help='The initialization model path.')
parser.add_argument('--model_config', type=str, help='The path to the model configuration')
parser.add_argument('--optim_config', type=str, help='The path to the optimizer configuration')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
parser.add_argument('--init_checkpoint', type=str, help='The checkpoint path to the initial model.')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size', type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
return args

View File

@@ -0,0 +1,23 @@
import random, argparse
from .share_args import add_shared_args
def obtain_cls_kd_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--init_model' , type=str, help='The initialization model path.')
parser.add_argument('--model_config', type=str, help='The path to the model configuration')
parser.add_argument('--optim_config', type=str, help='The path to the optimizer configuration')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
parser.add_argument('--KD_checkpoint', type=str, help='The teacher checkpoint in knowledge distillation.')
parser.add_argument('--KD_alpha' , type=float, help='The alpha parameter in knowledge distillation.')
parser.add_argument('--KD_temperature', type=float, help='The temperature parameter in knowledge distillation.')
#parser.add_argument('--KD_feature', type=float, help='Knowledge distillation at the feature level.')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size', type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
return args

View File

@@ -0,0 +1,106 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import os, json
from os import path as osp
from pathlib import Path
from collections import namedtuple
support_types = ('str', 'int', 'bool', 'float', 'none')
def convert_param(original_lists):
assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists)
ctype, value = original_lists[0], original_lists[1]
assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types)
is_list = isinstance(value, list)
if not is_list: value = [value]
outs = []
for x in value:
if ctype == 'int':
x = int(x)
elif ctype == 'str':
x = str(x)
elif ctype == 'bool':
x = bool(int(x))
elif ctype == 'float':
x = float(x)
elif ctype == 'none':
if x.lower() != 'none':
raise ValueError('For the none type, the value must be none instead of {:}'.format(x))
x = None
else:
raise TypeError('Does not know this type : {:}'.format(ctype))
outs.append(x)
if not is_list: outs = outs[0]
return outs
def load_config(path, extra, logger):
path = str(path)
if hasattr(logger, 'log'): logger.log(path)
assert os.path.exists(path), 'Can not find {:}'.format(path)
# Reading data back
with open(path, 'r') as f:
data = json.load(f)
content = { k: convert_param(v) for k,v in data.items()}
assert extra is None or isinstance(extra, dict), 'invalid type of extra : {:}'.format(extra)
if isinstance(extra, dict): content = {**content, **extra}
Arguments = namedtuple('Configure', ' '.join(content.keys()))
content = Arguments(**content)
if hasattr(logger, 'log'): logger.log('{:}'.format(content))
return content
def configure2str(config, xpath=None):
if not isinstance(config, dict):
config = config._asdict()
def cstring(x):
return "\"{:}\"".format(x)
def gtype(x):
if isinstance(x, list): x = x[0]
if isinstance(x, str) : return 'str'
elif isinstance(x, bool) : return 'bool'
elif isinstance(x, int): return 'int'
elif isinstance(x, float): return 'float'
elif x is None : return 'none'
else: raise ValueError('invalid : {:}'.format(x))
def cvalue(x, xtype):
if isinstance(x, list): is_list = True
else:
is_list, x = False, [x]
temps = []
for temp in x:
if xtype == 'bool' : temp = cstring(int(temp))
elif xtype == 'none': temp = cstring('None')
else : temp = cstring(temp)
temps.append( temp )
if is_list:
return "[{:}]".format( ', '.join( temps ) )
else:
return temps[0]
xstrings = []
for key, value in config.items():
xtype = gtype(value)
string = ' {:20s} : [{:8s}, {:}]'.format(cstring(key), cstring(xtype), cvalue(value, xtype))
xstrings.append(string)
Fstring = '{\n' + ',\n'.join(xstrings) + '\n}'
if xpath is not None:
parent = Path(xpath).resolve().parent
parent.mkdir(parents=True, exist_ok=True)
if osp.isfile(xpath): os.remove(xpath)
with open(xpath, "w") as text_file:
text_file.write('{:}'.format(Fstring))
return Fstring
def dict2config(xdict, logger):
assert isinstance(xdict, dict), 'invalid type : {:}'.format( type(xdict) )
Arguments = namedtuple('Configure', ' '.join(xdict.keys()))
content = Arguments(**xdict)
if hasattr(logger, 'log'): logger.log('{:}'.format(content))
return content

View File

@@ -0,0 +1,26 @@
import os, sys, time, random, argparse
from .share_args import add_shared_args
def obtain_pruning_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--init_model' , type=str, help='The initialization model path.')
parser.add_argument('--model_config', type=str, help='The path to the model configuration')
parser.add_argument('--optim_config', type=str, help='The path to the optimizer configuration')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
parser.add_argument('--keep_ratio' , type=float, help='The left channel ratio compared to the original network.')
parser.add_argument('--model_version', type=str, help='The network version.')
parser.add_argument('--KD_alpha' , type=float, help='The alpha parameter in knowledge distillation.')
parser.add_argument('--KD_temperature', type=float, help='The temperature parameter in knowledge distillation.')
parser.add_argument('--Regular_W_feat', type=float, help='The .')
parser.add_argument('--Regular_W_conv', type=float, help='The .')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size', type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
assert args.keep_ratio > 0 and args.keep_ratio <= 1, 'invalid keep ratio : {:}'.format(args.keep_ratio)
return args

View File

@@ -0,0 +1,24 @@
import os, sys, time, random, argparse
from .share_args import add_shared_args
def obtain_RandomSearch_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--init_model' , type=str, help='The initialization model path.')
parser.add_argument('--expect_flop', type=float, help='The expected flop keep ratio.')
parser.add_argument('--arch_nums' , type=int, help='The maximum number of running random arch generating..')
parser.add_argument('--model_config', type=str, help='The path to the model configuration')
parser.add_argument('--optim_config', type=str, help='The path to the optimizer configuration')
parser.add_argument('--random_mode', type=str, choices=['random', 'fix'], help='The path to the optimizer configuration')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size', type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
#assert args.flop_ratio_min < args.flop_ratio_max, 'flop-ratio {:} vs {:}'.format(args.flop_ratio_min, args.flop_ratio_max)
return args

View File

@@ -0,0 +1,32 @@
import os, sys, time, random, argparse
from .share_args import add_shared_args
def obtain_search_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--model_config' , type=str, help='The path to the model configuration')
parser.add_argument('--optim_config' , type=str, help='The path to the optimizer configuration')
parser.add_argument('--split_path' , type=str, help='The split file path.')
#parser.add_argument('--arch_para_pure', type=int, help='The architecture-parameter pure or not.')
parser.add_argument('--gumbel_tau_max', type=float, help='The maximum tau for Gumbel.')
parser.add_argument('--gumbel_tau_min', type=float, help='The minimum tau for Gumbel.')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
parser.add_argument('--FLOP_ratio' , type=float, help='The expected FLOP ratio.')
parser.add_argument('--FLOP_weight' , type=float, help='The loss weight for FLOP.')
parser.add_argument('--FLOP_tolerant' , type=float, help='The tolerant range for FLOP.')
# ablation studies
parser.add_argument('--ablation_num_select', type=int, help='The number of randomly selected channels.')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size' , type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
assert args.gumbel_tau_max is not None and args.gumbel_tau_min is not None
assert args.FLOP_tolerant is not None and args.FLOP_tolerant > 0, 'invalid FLOP_tolerant : {:}'.format(FLOP_tolerant)
#assert args.arch_para_pure is not None, 'arch_para_pure is not None: {:}'.format(args.arch_para_pure)
#args.arch_para_pure = bool(args.arch_para_pure)
return args

View File

@@ -0,0 +1,31 @@
import os, sys, time, random, argparse
from .share_args import add_shared_args
def obtain_search_single_args():
parser = argparse.ArgumentParser(description='Train a classification model on typical image classification datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--resume' , type=str, help='Resume path.')
parser.add_argument('--model_config' , type=str, help='The path to the model configuration')
parser.add_argument('--optim_config' , type=str, help='The path to the optimizer configuration')
parser.add_argument('--split_path' , type=str, help='The split file path.')
parser.add_argument('--search_shape' , type=str, help='The shape to be searched.')
#parser.add_argument('--arch_para_pure', type=int, help='The architecture-parameter pure or not.')
parser.add_argument('--gumbel_tau_max', type=float, help='The maximum tau for Gumbel.')
parser.add_argument('--gumbel_tau_min', type=float, help='The minimum tau for Gumbel.')
parser.add_argument('--procedure' , type=str, help='The procedure basic prefix.')
parser.add_argument('--FLOP_ratio' , type=float, help='The expected FLOP ratio.')
parser.add_argument('--FLOP_weight' , type=float, help='The loss weight for FLOP.')
parser.add_argument('--FLOP_tolerant' , type=float, help='The tolerant range for FLOP.')
add_shared_args( parser )
# Optimization options
parser.add_argument('--batch_size' , type=int, default=2, help='Batch size for training.')
args = parser.parse_args()
if args.rand_seed is None or args.rand_seed < 0:
args.rand_seed = random.randint(1, 100000)
assert args.save_dir is not None, 'save-path argument can not be None'
assert args.gumbel_tau_max is not None and args.gumbel_tau_min is not None
assert args.FLOP_tolerant is not None and args.FLOP_tolerant > 0, 'invalid FLOP_tolerant : {:}'.format(FLOP_tolerant)
#assert args.arch_para_pure is not None, 'arch_para_pure is not None: {:}'.format(args.arch_para_pure)
#args.arch_para_pure = bool(args.arch_para_pure)
return args

View File

@@ -0,0 +1,17 @@
import os, sys, time, random, argparse
def add_shared_args( parser ):
# Data Generation
parser.add_argument('--dataset', type=str, help='The dataset name.')
parser.add_argument('--data_path', type=str, help='The dataset name.')
parser.add_argument('--cutout_length', type=int, help='The cutout length, negative means not use.')
# Printing
parser.add_argument('--print_freq', type=int, default=100, help='print frequency (default: 200)')
parser.add_argument('--print_freq_eval', type=int, default=100, help='print frequency (default: 200)')
# Checkpoints
parser.add_argument('--eval_frequency', type=int, default=1, help='evaluation frequency (default: 200)')
parser.add_argument('--save_dir', type=str, help='Folder to save checkpoints and log.')
# Acceleration
parser.add_argument('--workers', type=int, default=8, help='number of data loading workers (default: 8)')
# Random Seed
parser.add_argument('--rand_seed', type=int, default=-1, help='manual seed')

View File

@@ -0,0 +1,129 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import os, sys, hashlib, torch
import numpy as np
from PIL import Image
import torch.utils.data as data
if sys.version_info[0] == 2:
import cPickle as pickle
else:
import pickle
def calculate_md5(fpath, chunk_size=1024 * 1024):
md5 = hashlib.md5()
with open(fpath, 'rb') as f:
for chunk in iter(lambda: f.read(chunk_size), b''):
md5.update(chunk)
return md5.hexdigest()
def check_md5(fpath, md5, **kwargs):
return md5 == calculate_md5(fpath, **kwargs)
def check_integrity(fpath, md5=None):
if not os.path.isfile(fpath): return False
if md5 is None: return True
else : return check_md5(fpath, md5)
class ImageNet16(data.Dataset):
# http://image-net.org/download-images
# A Downsampled Variant of ImageNet as an Alternative to the CIFAR datasets
# https://arxiv.org/pdf/1707.08819.pdf
train_list = [
['train_data_batch_1', '27846dcaa50de8e21a7d1a35f30f0e91'],
['train_data_batch_2', 'c7254a054e0e795c69120a5727050e3f'],
['train_data_batch_3', '4333d3df2e5ffb114b05d2ffc19b1e87'],
['train_data_batch_4', '1620cdf193304f4a92677b695d70d10f'],
['train_data_batch_5', '348b3c2fdbb3940c4e9e834affd3b18d'],
['train_data_batch_6', '6e765307c242a1b3d7d5ef9139b48945'],
['train_data_batch_7', '564926d8cbf8fc4818ba23d2faac7564'],
['train_data_batch_8', 'f4755871f718ccb653440b9dd0ebac66'],
['train_data_batch_9', 'bb6dd660c38c58552125b1a92f86b5d4'],
['train_data_batch_10','8f03f34ac4b42271a294f91bf480f29b'],
]
valid_list = [
['val_data', '3410e3017fdaefba8d5073aaa65e4bd6'],
]
def __init__(self, root, train, transform, use_num_of_class_only=None):
self.root = root
self.transform = transform
self.train = train # training set or valid set
if not self._check_integrity(): raise RuntimeError('Dataset not found or corrupted.')
if self.train: downloaded_list = self.train_list
else : downloaded_list = self.valid_list
self.data = []
self.targets = []
# now load the picked numpy arrays
for i, (file_name, checksum) in enumerate(downloaded_list):
file_path = os.path.join(self.root, file_name)
#print ('Load {:}/{:02d}-th : {:}'.format(i, len(downloaded_list), file_path))
with open(file_path, 'rb') as f:
if sys.version_info[0] == 2:
entry = pickle.load(f)
else:
entry = pickle.load(f, encoding='latin1')
self.data.append(entry['data'])
self.targets.extend(entry['labels'])
self.data = np.vstack(self.data).reshape(-1, 3, 16, 16)
self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC
if use_num_of_class_only is not None:
assert isinstance(use_num_of_class_only, int) and use_num_of_class_only > 0 and use_num_of_class_only < 1000, 'invalid use_num_of_class_only : {:}'.format(use_num_of_class_only)
new_data, new_targets = [], []
for I, L in zip(self.data, self.targets):
if 1 <= L <= use_num_of_class_only:
new_data.append( I )
new_targets.append( L )
self.data = new_data
self.targets = new_targets
# self.mean.append(entry['mean'])
#self.mean = np.vstack(self.mean).reshape(-1, 3, 16, 16)
#self.mean = np.mean(np.mean(np.mean(self.mean, axis=0), axis=1), axis=1)
#print ('Mean : {:}'.format(self.mean))
#temp = self.data - np.reshape(self.mean, (1, 1, 1, 3))
#std_data = np.std(temp, axis=0)
#std_data = np.mean(np.mean(std_data, axis=0), axis=0)
#print ('Std : {:}'.format(std_data))
def __getitem__(self, index):
img, target = self.data[index], self.targets[index] - 1
img = Image.fromarray(img)
if self.transform is not None:
img = self.transform(img)
return img, target
def __len__(self):
return len(self.data)
def _check_integrity(self):
root = self.root
for fentry in (self.train_list + self.valid_list):
filename, md5 = fentry[0], fentry[1]
fpath = os.path.join(root, filename)
if not check_integrity(fpath, md5):
return False
return True
#
if __name__ == '__main__':
train = ImageNet16('/data02/dongxuanyi/.torch/cifar.python/ImageNet16', True , None)
valid = ImageNet16('/data02/dongxuanyi/.torch/cifar.python/ImageNet16', False, None)
print ( len(train) )
print ( len(valid) )
image, label = train[111]
trainX = ImageNet16('/data02/dongxuanyi/.torch/cifar.python/ImageNet16', True , None, 200)
validX = ImageNet16('/data02/dongxuanyi/.torch/cifar.python/ImageNet16', False , None, 200)
print ( len(trainX) )
print ( len(validX) )
#import pdb; pdb.set_trace()

View File

@@ -0,0 +1,191 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
from os import path as osp
from copy import deepcopy as copy
from tqdm import tqdm
import warnings, time, random, numpy as np
from pts_utils import generate_label_map
from xvision import denormalize_points
from xvision import identity2affine, solve2theta, affine2image
from .dataset_utils import pil_loader
from .landmark_utils import PointMeta2V
from .augmentation_utils import CutOut
import torch
import torch.utils.data as data
class LandmarkDataset(data.Dataset):
def __init__(self, transform, sigma, downsample, heatmap_type, shape, use_gray, mean_file, data_indicator, cache_images=None):
self.transform = transform
self.sigma = sigma
self.downsample = downsample
self.heatmap_type = heatmap_type
self.dataset_name = data_indicator
self.shape = shape # [H,W]
self.use_gray = use_gray
assert transform is not None, 'transform : {:}'.format(transform)
self.mean_file = mean_file
if mean_file is None:
self.mean_data = None
warnings.warn('LandmarkDataset initialized with mean_data = None')
else:
assert osp.isfile(mean_file), '{:} is not a file.'.format(mean_file)
self.mean_data = torch.load(mean_file)
self.reset()
self.cutout = None
self.cache_images = cache_images
print ('The general dataset initialization done : {:}'.format(self))
warnings.simplefilter( 'once' )
def __repr__(self):
return ('{name}(point-num={NUM_PTS}, shape={shape}, sigma={sigma}, heatmap_type={heatmap_type}, length={length}, cutout={cutout}, dataset={dataset_name}, mean={mean_file})'.format(name=self.__class__.__name__, **self.__dict__))
def set_cutout(self, length):
if length is not None and length >= 1:
self.cutout = CutOut( int(length) )
else: self.cutout = None
def reset(self, num_pts=-1, boxid='default', only_pts=False):
self.NUM_PTS = num_pts
if only_pts: return
self.length = 0
self.datas = []
self.labels = []
self.NormDistances = []
self.BOXID = boxid
if self.mean_data is None:
self.mean_face = None
else:
self.mean_face = torch.Tensor(self.mean_data[boxid].copy().T)
assert (self.mean_face >= -1).all() and (self.mean_face <= 1).all(), 'mean-{:}-face : {:}'.format(boxid, self.mean_face)
#assert self.dataset_name is not None, 'The dataset name is None'
def __len__(self):
assert len(self.datas) == self.length, 'The length is not correct : {}'.format(self.length)
return self.length
def append(self, data, label, distance):
assert osp.isfile(data), 'The image path is not a file : {:}'.format(data)
self.datas.append( data ) ; self.labels.append( label )
self.NormDistances.append( distance )
self.length = self.length + 1
def load_list(self, file_lists, num_pts, boxindicator, normalizeL, reset):
if reset: self.reset(num_pts, boxindicator)
else : assert self.NUM_PTS == num_pts and self.BOXID == boxindicator, 'The number of point is inconsistance : {:} vs {:}'.format(self.NUM_PTS, num_pts)
if isinstance(file_lists, str): file_lists = [file_lists]
samples = []
for idx, file_path in enumerate(file_lists):
print (':::: load list {:}/{:} : {:}'.format(idx, len(file_lists), file_path))
xdata = torch.load(file_path)
if isinstance(xdata, list) : data = xdata # image or video dataset list
elif isinstance(xdata, dict): data = xdata['datas'] # multi-view dataset list
else: raise ValueError('Invalid Type Error : {:}'.format( type(xdata) ))
samples = samples + data
# samples is a dict, where the key is the image-path and the value is the annotation
# each annotation is a dict, contains 'points' (3,num_pts), and various box
print ('GeneralDataset-V2 : {:} samples'.format(len(samples)))
#for index, annotation in enumerate(samples):
for index in tqdm( range( len(samples) ) ):
annotation = samples[index]
image_path = annotation['current_frame']
points, box = annotation['points'], annotation['box-{:}'.format(boxindicator)]
label = PointMeta2V(self.NUM_PTS, points, box, image_path, self.dataset_name)
if normalizeL is None: normDistance = None
else : normDistance = annotation['normalizeL-{:}'.format(normalizeL)]
self.append(image_path, label, normDistance)
assert len(self.datas) == self.length, 'The length and the data is not right {} vs {}'.format(self.length, len(self.datas))
assert len(self.labels) == self.length, 'The length and the labels is not right {} vs {}'.format(self.length, len(self.labels))
assert len(self.NormDistances) == self.length, 'The length and the NormDistances is not right {} vs {}'.format(self.length, len(self.NormDistance))
print ('Load data done for LandmarkDataset, which has {:} images.'.format(self.length))
def __getitem__(self, index):
assert index >= 0 and index < self.length, 'Invalid index : {:}'.format(index)
if self.cache_images is not None and self.datas[index] in self.cache_images:
image = self.cache_images[ self.datas[index] ].clone()
else:
image = pil_loader(self.datas[index], self.use_gray)
target = self.labels[index].copy()
return self._process_(image, target, index)
def _process_(self, image, target, index):
# transform the image and points
image, target, theta = self.transform(image, target)
(C, H, W), (height, width) = image.size(), self.shape
# obtain the visiable indicator vector
if target.is_none(): nopoints = True
else : nopoints = False
if index == -1: __path = None
else : __path = self.datas[index]
if isinstance(theta, list) or isinstance(theta, tuple):
affineImage, heatmaps, mask, norm_trans_points, THETA, transpose_theta = [], [], [], [], [], []
for _theta in theta:
_affineImage, _heatmaps, _mask, _norm_trans_points, _theta, _transpose_theta \
= self.__process_affine(image, target, _theta, nopoints, 'P[{:}]@{:}'.format(index, __path))
affineImage.append(_affineImage)
heatmaps.append(_heatmaps)
mask.append(_mask)
norm_trans_points.append(_norm_trans_points)
THETA.append(_theta)
transpose_theta.append(_transpose_theta)
affineImage, heatmaps, mask, norm_trans_points, THETA, transpose_theta = \
torch.stack(affineImage), torch.stack(heatmaps), torch.stack(mask), torch.stack(norm_trans_points), torch.stack(THETA), torch.stack(transpose_theta)
else:
affineImage, heatmaps, mask, norm_trans_points, THETA, transpose_theta = self.__process_affine(image, target, theta, nopoints, 'S[{:}]@{:}'.format(index, __path))
torch_index = torch.IntTensor([index])
torch_nopoints = torch.ByteTensor( [ nopoints ] )
torch_shape = torch.IntTensor([H,W])
return affineImage, heatmaps, mask, norm_trans_points, THETA, transpose_theta, torch_index, torch_nopoints, torch_shape
def __process_affine(self, image, target, theta, nopoints, aux_info=None):
image, target, theta = image.clone(), target.copy(), theta.clone()
(C, H, W), (height, width) = image.size(), self.shape
if nopoints: # do not have label
norm_trans_points = torch.zeros((3, self.NUM_PTS))
heatmaps = torch.zeros((self.NUM_PTS+1, height//self.downsample, width//self.downsample))
mask = torch.ones((self.NUM_PTS+1, 1, 1), dtype=torch.uint8)
transpose_theta = identity2affine(False)
else:
norm_trans_points = apply_affine2point(target.get_points(), theta, (H,W))
norm_trans_points = apply_boundary(norm_trans_points)
real_trans_points = norm_trans_points.clone()
real_trans_points[:2, :] = denormalize_points(self.shape, real_trans_points[:2,:])
heatmaps, mask = generate_label_map(real_trans_points.numpy(), height//self.downsample, width//self.downsample, self.sigma, self.downsample, nopoints, self.heatmap_type) # H*W*C
heatmaps = torch.from_numpy(heatmaps.transpose((2, 0, 1))).type(torch.FloatTensor)
mask = torch.from_numpy(mask.transpose((2, 0, 1))).type(torch.ByteTensor)
if self.mean_face is None:
#warnings.warn('In LandmarkDataset use identity2affine for transpose_theta because self.mean_face is None.')
transpose_theta = identity2affine(False)
else:
if torch.sum(norm_trans_points[2,:] == 1) < 3:
warnings.warn('In LandmarkDataset after transformation, no visiable point, using identity instead. Aux: {:}'.format(aux_info))
transpose_theta = identity2affine(False)
else:
transpose_theta = solve2theta(norm_trans_points, self.mean_face.clone())
affineImage = affine2image(image, theta, self.shape)
if self.cutout is not None: affineImage = self.cutout( affineImage )
return affineImage, heatmaps, mask, norm_trans_points, theta, transpose_theta

View File

@@ -0,0 +1,46 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import torch, copy, random
import torch.utils.data as data
class SearchDataset(data.Dataset):
def __init__(self, name, data, train_split, valid_split, check=True):
self.datasetname = name
if isinstance(data, (list, tuple)): # new type of SearchDataset
assert len(data) == 2, 'invalid length: {:}'.format( len(data) )
self.train_data = data[0]
self.valid_data = data[1]
self.train_split = train_split.copy()
self.valid_split = valid_split.copy()
self.mode_str = 'V2' # new mode
else:
self.mode_str = 'V1' # old mode
self.data = data
self.train_split = train_split.copy()
self.valid_split = valid_split.copy()
if check:
intersection = set(train_split).intersection(set(valid_split))
assert len(intersection) == 0, 'the splitted train and validation sets should have no intersection'
self.length = len(self.train_split)
def __repr__(self):
return ('{name}(name={datasetname}, train={tr_L}, valid={val_L}, version={ver})'.format(name=self.__class__.__name__, datasetname=self.datasetname, tr_L=len(self.train_split), val_L=len(self.valid_split), ver=self.mode_str))
def __len__(self):
return self.length
def __getitem__(self, index):
assert index >= 0 and index < self.length, 'invalid index = {:}'.format(index)
train_index = self.train_split[index]
valid_index = random.choice( self.valid_split )
if self.mode_str == 'V1':
train_image, train_label = self.data[train_index]
valid_image, valid_label = self.data[valid_index]
elif self.mode_str == 'V2':
train_image, train_label = self.train_data[train_index]
valid_image, valid_label = self.valid_data[valid_index]
else: raise ValueError('invalid mode : {:}'.format(self.mode_str))
return train_image, train_label, valid_image, valid_label

View File

@@ -0,0 +1,6 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
from .get_dataset_with_transform import get_datasets, get_nas_search_loaders
from .SearchDatasetWrap import SearchDataset
from .data import get_data

View File

@@ -0,0 +1,69 @@
from datasets import get_datasets
from config_utils import load_config
import torch
import torchvision
class AddGaussianNoise(object):
def __init__(self, mean=0., std=0.001):
self.std = std
self.mean = mean
def __call__(self, tensor):
return tensor + torch.randn(tensor.size()) * self.std + self.mean
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
class RepeatSampler(torch.utils.data.sampler.Sampler):
def __init__(self, samp, repeat):
self.samp = samp
self.repeat = repeat
def __iter__(self):
for i in self.samp:
for j in range(self.repeat):
yield i
def __len__(self):
return self.repeat*len(self.samp)
def get_data(dataset, data_loc, trainval, batch_size, augtype, repeat, args, pin_memory=True):
train_data, valid_data, xshape, class_num = get_datasets(dataset, data_loc, cutout=0)
if augtype == 'gaussnoise':
train_data.transform.transforms = train_data.transform.transforms[2:]
train_data.transform.transforms.append(AddGaussianNoise(std=args.sigma))
elif augtype == 'cutout':
train_data.transform.transforms = train_data.transform.transforms[2:]
train_data.transform.transforms.append(torchvision.transforms.RandomErasing(p=0.9, scale=(0.02, 0.04)))
elif augtype == 'none':
train_data.transform.transforms = train_data.transform.transforms[2:]
if dataset == 'cifar10':
acc_type = 'ori-test'
val_acc_type = 'x-valid'
else:
acc_type = 'x-test'
val_acc_type = 'x-valid'
if trainval and 'cifar10' in dataset:
cifar_split = load_config('config_utils/cifar-split.txt', None, None)
train_split, valid_split = cifar_split.train, cifar_split.valid
if repeat > 0:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
num_workers=0, pin_memory=pin_memory, sampler= RepeatSampler(torch.utils.data.sampler.SubsetRandomSampler(train_split), repeat))
else:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
num_workers=0, pin_memory=pin_memory, sampler= torch.utils.data.sampler.SubsetRandomSampler(train_split))
else:
if repeat > 0:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, #shuffle=True,
num_workers=0, pin_memory=pin_memory, sampler= RepeatSampler(torch.utils.data.sampler.SubsetRandomSampler(range(len(train_data))), repeat))
else:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True,
num_workers=0, pin_memory=pin_memory)
return train_loader

View File

@@ -0,0 +1,255 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import os, sys, torch
import os.path as osp
import numpy as np
import torchvision.datasets as dset
import torchvision.transforms as transforms
from copy import deepcopy
from PIL import Image
from .DownsampledImageNet import ImageNet16
from .SearchDatasetWrap import SearchDataset
from config_utils import load_config
Dataset2Class = {'cifar10' : 10,
'cifar100': 100,
'fake':10,
'imagenet-1k-s':1000,
'imagenette2' : 10,
'imagenet-1k' : 1000,
'ImageNet16' : 1000,
'ImageNet16-150': 150,
'ImageNet16-120': 120,
'ImageNet16-200': 200}
class CUTOUT(object):
def __init__(self, length):
self.length = length
def __repr__(self):
return ('{name}(length={length})'.format(name=self.__class__.__name__, **self.__dict__))
def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)
mask[y1: y2, x1: x2] = 0.
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img
imagenet_pca = {
'eigval': np.asarray([0.2175, 0.0188, 0.0045]),
'eigvec': np.asarray([
[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203],
])
}
class Lighting(object):
def __init__(self, alphastd,
eigval=imagenet_pca['eigval'],
eigvec=imagenet_pca['eigvec']):
self.alphastd = alphastd
assert eigval.shape == (3,)
assert eigvec.shape == (3, 3)
self.eigval = eigval
self.eigvec = eigvec
def __call__(self, img):
if self.alphastd == 0.:
return img
rnd = np.random.randn(3) * self.alphastd
rnd = rnd.astype('float32')
v = rnd
old_dtype = np.asarray(img).dtype
v = v * self.eigval
v = v.reshape((3, 1))
inc = np.dot(self.eigvec, v).reshape((3,))
img = np.add(img, inc)
if old_dtype == np.uint8:
img = np.clip(img, 0, 255)
img = Image.fromarray(img.astype(old_dtype), 'RGB')
return img
def __repr__(self):
return self.__class__.__name__ + '()'
def get_datasets(name, root, cutout):
if name == 'cifar10':
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]
elif name == 'cifar100':
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
std = [x / 255 for x in [68.2, 65.4, 70.4]]
elif name == 'fake':
mean = [x / 255 for x in [129.3, 124.1, 112.4]]
std = [x / 255 for x in [68.2, 65.4, 70.4]]
elif name.startswith('imagenet-1k'):
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
elif name.startswith('imagenette'):
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
elif name.startswith('ImageNet16'):
mean = [x / 255 for x in [122.68, 116.66, 104.01]]
std = [x / 255 for x in [63.22, 61.26 , 65.09]]
else:
raise TypeError("Unknow dataset : {:}".format(name))
# Data Argumentation
if name == 'cifar10' or name == 'cifar100':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
if cutout > 0 : lists += [CUTOUT(cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
xshape = (1, 3, 32, 32)
elif name == 'fake':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
if cutout > 0 : lists += [CUTOUT(cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
xshape = (1, 3, 32, 32)
elif name.startswith('ImageNet16'):
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(16, padding=2), transforms.ToTensor(), transforms.Normalize(mean, std)]
if cutout > 0 : lists += [CUTOUT(cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
xshape = (1, 3, 16, 16)
elif name == 'tiered':
lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(80, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]
if cutout > 0 : lists += [CUTOUT(cutout)]
train_transform = transforms.Compose(lists)
test_transform = transforms.Compose([transforms.CenterCrop(80), transforms.ToTensor(), transforms.Normalize(mean, std)])
xshape = (1, 3, 32, 32)
elif name.startswith('imagenette'):
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
xlists = []
xlists.append( transforms.ToTensor() )
xlists.append( normalize )
#train_transform = transforms.Compose(xlists)
train_transform = transforms.Compose([normalize, normalize, transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize])
test_transform = transforms.Compose([normalize, normalize, transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize])
xshape = (1, 3, 224, 224)
elif name.startswith('imagenet-1k'):
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
if name == 'imagenet-1k':
xlists = [transforms.RandomResizedCrop(224)]
xlists.append(
transforms.ColorJitter(
brightness=0.4,
contrast=0.4,
saturation=0.4,
hue=0.2))
xlists.append( Lighting(0.1))
elif name == 'imagenet-1k-s':
xlists = [transforms.RandomResizedCrop(224, scale=(0.2, 1.0))]
else: raise ValueError('invalid name : {:}'.format(name))
xlists.append( transforms.RandomHorizontalFlip(p=0.5) )
xlists.append( transforms.ToTensor() )
xlists.append( normalize )
train_transform = transforms.Compose(xlists)
test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize])
xshape = (1, 3, 224, 224)
else:
raise TypeError("Unknow dataset : {:}".format(name))
if name == 'cifar10':
train_data = dset.CIFAR10 (root, train=True , transform=train_transform, download=True)
test_data = dset.CIFAR10 (root, train=False, transform=test_transform , download=True)
assert len(train_data) == 50000 and len(test_data) == 10000
elif name == 'cifar100':
train_data = dset.CIFAR100(root, train=True , transform=train_transform, download=True)
test_data = dset.CIFAR100(root, train=False, transform=test_transform , download=True)
assert len(train_data) == 50000 and len(test_data) == 10000
elif name == 'fake':
train_data = dset.FakeData(size=50000, image_size=(3, 32, 32), transform=train_transform)
test_data = dset.FakeData(size=10000, image_size=(3, 32, 32), transform=test_transform)
elif name.startswith('imagenette2'):
train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
test_data = dset.ImageFolder(osp.join(root, 'val'), test_transform)
elif name.startswith('imagenet-1k'):
train_data = dset.ImageFolder(osp.join(root, 'train'), train_transform)
test_data = dset.ImageFolder(osp.join(root, 'val'), test_transform)
assert len(train_data) == 1281167 and len(test_data) == 50000, 'invalid number of images : {:} & {:} vs {:} & {:}'.format(len(train_data), len(test_data), 1281167, 50000)
elif name == 'ImageNet16':
train_data = ImageNet16(root, True , train_transform)
test_data = ImageNet16(root, False, test_transform)
assert len(train_data) == 1281167 and len(test_data) == 50000
elif name == 'ImageNet16-120':
train_data = ImageNet16(root, True , train_transform, 120)
test_data = ImageNet16(root, False, test_transform , 120)
assert len(train_data) == 151700 and len(test_data) == 6000
elif name == 'ImageNet16-150':
train_data = ImageNet16(root, True , train_transform, 150)
test_data = ImageNet16(root, False, test_transform , 150)
assert len(train_data) == 190272 and len(test_data) == 7500
elif name == 'ImageNet16-200':
train_data = ImageNet16(root, True , train_transform, 200)
test_data = ImageNet16(root, False, test_transform , 200)
assert len(train_data) == 254775 and len(test_data) == 10000
else: raise TypeError("Unknow dataset : {:}".format(name))
class_num = Dataset2Class[name]
return train_data, test_data, xshape, class_num
def get_nas_search_loaders(train_data, valid_data, dataset, config_root, batch_size, workers):
if isinstance(batch_size, (list,tuple)):
batch, test_batch = batch_size
else:
batch, test_batch = batch_size, batch_size
if dataset == 'cifar10':
#split_Fpath = 'configs/nas-benchmark/cifar-split.txt'
cifar_split = load_config('{:}/cifar-split.txt'.format(config_root), None, None)
train_split, valid_split = cifar_split.train, cifar_split.valid # search over the proposed training and validation set
#logger.log('Load split file from {:}'.format(split_Fpath)) # they are two disjoint groups in the original CIFAR-10 training set
# To split data
xvalid_data = deepcopy(train_data)
if hasattr(xvalid_data, 'transforms'): # to avoid a print issue
xvalid_data.transforms = valid_data.transform
xvalid_data.transform = deepcopy( valid_data.transform )
search_data = SearchDataset(dataset, train_data, train_split, valid_split)
# data loader
search_loader = torch.utils.data.DataLoader(search_data, batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True)
train_loader = torch.utils.data.DataLoader(train_data , batch_size=batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=workers, pin_memory=True)
valid_loader = torch.utils.data.DataLoader(xvalid_data, batch_size=test_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=workers, pin_memory=True)
elif dataset == 'cifar100':
cifar100_test_split = load_config('{:}/cifar100-test-split.txt'.format(config_root), None, None)
search_train_data = train_data
search_valid_data = deepcopy(valid_data) ; search_valid_data.transform = train_data.transform
search_data = SearchDataset(dataset, [search_train_data,search_valid_data], list(range(len(search_train_data))), cifar100_test_split.xvalid)
search_loader = torch.utils.data.DataLoader(search_data, batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True)
train_loader = torch.utils.data.DataLoader(train_data , batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True)
valid_loader = torch.utils.data.DataLoader(valid_data , batch_size=test_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_test_split.xvalid), num_workers=workers, pin_memory=True)
elif dataset == 'ImageNet16-120':
imagenet_test_split = load_config('{:}/imagenet-16-120-test-split.txt'.format(config_root), None, None)
search_train_data = train_data
search_valid_data = deepcopy(valid_data) ; search_valid_data.transform = train_data.transform
search_data = SearchDataset(dataset, [search_train_data,search_valid_data], list(range(len(search_train_data))), imagenet_test_split.xvalid)
search_loader = torch.utils.data.DataLoader(search_data, batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True)
train_loader = torch.utils.data.DataLoader(train_data , batch_size=batch, shuffle=True , num_workers=workers, pin_memory=True)
valid_loader = torch.utils.data.DataLoader(valid_data , batch_size=test_batch, sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet_test_split.xvalid), num_workers=workers, pin_memory=True)
else:
raise ValueError('invalid dataset : {:}'.format(dataset))
return search_loader, train_loader, valid_loader
#if __name__ == '__main__':
# train_data, test_data, xshape, class_num = dataset = get_datasets('cifar10', '/data02/dongxuanyi/.torch/cifar.python/', -1)
# import pdb; pdb.set_trace()

View File

@@ -0,0 +1 @@
from .point_meta import PointMeta2V, apply_affine2point, apply_boundary

View File

@@ -0,0 +1,116 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import copy, math, torch, numpy as np
from xvision import normalize_points
from xvision import denormalize_points
class PointMeta():
# points : 3 x num_pts (x, y, oculusion)
# image_size: original [width, height]
def __init__(self, num_point, points, box, image_path, dataset_name):
self.num_point = num_point
if box is not None:
assert (isinstance(box, tuple) or isinstance(box, list)) and len(box) == 4
self.box = torch.Tensor(box)
else: self.box = None
if points is None:
self.points = points
else:
assert len(points.shape) == 2 and points.shape[0] == 3 and points.shape[1] == self.num_point, 'The shape of point is not right : {}'.format( points )
self.points = torch.Tensor(points.copy())
self.image_path = image_path
self.datasets = dataset_name
def __repr__(self):
if self.box is None: boxstr = 'None'
else : boxstr = 'box=[{:.1f}, {:.1f}, {:.1f}, {:.1f}]'.format(*self.box.tolist())
return ('{name}(points={num_point}, '.format(name=self.__class__.__name__, **self.__dict__) + boxstr + ')')
def get_box(self, return_diagonal=False):
if self.box is None: return None
if not return_diagonal:
return self.box.clone()
else:
W = (self.box[2]-self.box[0]).item()
H = (self.box[3]-self.box[1]).item()
return math.sqrt(H*H+W*W)
def get_points(self, ignore_indicator=False):
if ignore_indicator: last = 2
else : last = 3
if self.points is not None: return self.points.clone()[:last, :]
else : return torch.zeros((last, self.num_point))
def is_none(self):
#assert self.box is not None, 'The box should not be None'
return self.points is None
#if self.box is None: return True
#else : return self.points is None
def copy(self):
return copy.deepcopy(self)
def visiable_pts_num(self):
with torch.no_grad():
ans = self.points[2,:] > 0
ans = torch.sum(ans)
ans = ans.item()
return ans
def special_fun(self, indicator):
if indicator == '68to49': # For 300W or 300VW, convert the default 68 points to 49 points.
assert self.num_point == 68, 'num-point must be 68 vs. {:}'.format(self.num_point)
self.num_point = 49
out = torch.ones((68), dtype=torch.uint8)
out[[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,60,64]] = 0
if self.points is not None: self.points = self.points.clone()[:, out]
else:
raise ValueError('Invalid indicator : {:}'.format( indicator ))
def apply_horizontal_flip(self):
#self.points[0, :] = width - self.points[0, :] - 1
# Mugsy spefic or Synthetic
if self.datasets.startswith('HandsyROT'):
ori = np.array(list(range(0, 42)))
pos = np.array(list(range(21,42)) + list(range(0,21)))
self.points[:, pos] = self.points[:, ori]
elif self.datasets.startswith('face68'):
ori = np.array(list(range(0, 68)))
pos = np.array([17,16,15,14,13,12,11,10, 9, 8,7,6,5,4,3,2,1, 27,26,25,24,23,22,21,20,19,18, 28,29,30,31, 36,35,34,33,32, 46,45,44,43,48,47, 40,39,38,37,42,41, 55,54,53,52,51,50,49,60,59,58,57,56,65,64,63,62,61,68,67,66])-1
self.points[:, ori] = self.points[:, pos]
else:
raise ValueError('Does not support {:}'.format(self.datasets))
# shape = (H,W)
def apply_affine2point(points, theta, shape):
assert points.size(0) == 3, 'invalid points shape : {:}'.format(points.size())
with torch.no_grad():
ok_points = points[2,:] == 1
assert torch.sum(ok_points).item() > 0, 'there is no visiable point'
points[:2,:] = normalize_points(shape, points[:2,:])
norm_trans_points = ok_points.unsqueeze(0).repeat(3, 1).float()
trans_points, ___ = torch.gesv(points[:, ok_points], theta)
norm_trans_points[:, ok_points] = trans_points
return norm_trans_points
def apply_boundary(norm_trans_points):
with torch.no_grad():
norm_trans_points = norm_trans_points.clone()
oks = torch.stack((norm_trans_points[0]>-1, norm_trans_points[0]<1, norm_trans_points[1]>-1, norm_trans_points[1]<1, norm_trans_points[2]>0))
oks = torch.sum(oks, dim=0) == 5
norm_trans_points[2, :] = oks
return norm_trans_points

View File

@@ -0,0 +1,20 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import os
def test_imagenet_data(imagenet):
total_length = len(imagenet)
assert total_length == 1281166 or total_length == 50000, 'The length of ImageNet is wrong : {}'.format(total_length)
map_id = {}
for index in range(total_length):
path, target = imagenet.imgs[index]
folder, image_name = os.path.split(path)
_, folder = os.path.split(folder)
if folder not in map_id:
map_id[folder] = target
else:
assert map_id[folder] == target, 'Class : {} is not {}'.format(folder, target)
assert image_name.find(folder) == 0, '{} is wrong.'.format(path)
print ('Check ImageNet Dataset OK')

View File

@@ -0,0 +1,105 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, torch
import torch.nn as nn
import torch.nn.functional as F
from .initialization import initialize_resnet
class Bottleneck(nn.Module):
def __init__(self, nChannels, growthRate):
super(Bottleneck, self).__init__()
interChannels = 4*growthRate
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(interChannels)
self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, padding=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
out = torch.cat((x, out), 1)
return out
class SingleLayer(nn.Module):
def __init__(self, nChannels, growthRate):
super(SingleLayer, self).__init__()
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = torch.cat((x, out), 1)
return out
class Transition(nn.Module):
def __init__(self, nChannels, nOutChannels):
super(Transition, self).__init__()
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = F.avg_pool2d(out, 2)
return out
class DenseNet(nn.Module):
def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
super(DenseNet, self).__init__()
if bottleneck: nDenseBlocks = int( (depth-4) / 6 )
else : nDenseBlocks = int( (depth-4) / 3 )
self.message = 'CifarDenseNet : block : {:}, depth : {:}, reduction : {:}, growth-rate = {:}, class = {:}'.format('bottleneck' if bottleneck else 'basic', depth, reduction, growthRate, nClasses)
nChannels = 2*growthRate
self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1, bias=False)
self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
nChannels += nDenseBlocks*growthRate
nOutChannels = int(math.floor(nChannels*reduction))
self.trans1 = Transition(nChannels, nOutChannels)
nChannels = nOutChannels
self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
nChannels += nDenseBlocks*growthRate
nOutChannels = int(math.floor(nChannels*reduction))
self.trans2 = Transition(nChannels, nOutChannels)
nChannels = nOutChannels
self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
nChannels += nDenseBlocks*growthRate
self.act = nn.Sequential(
nn.BatchNorm2d(nChannels), nn.ReLU(inplace=True),
nn.AvgPool2d(8))
self.fc = nn.Linear(nChannels, nClasses)
self.apply(initialize_resnet)
def get_message(self):
return self.message
def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
layers = []
for i in range(int(nDenseBlocks)):
if bottleneck:
layers.append(Bottleneck(nChannels, growthRate))
else:
layers.append(SingleLayer(nChannels, growthRate))
nChannels += growthRate
return nn.Sequential(*layers)
def forward(self, inputs):
out = self.conv1( inputs )
out = self.trans1(self.dense1(out))
out = self.trans2(self.dense2(out))
out = self.dense3(out)
features = self.act(out)
features = features.view(features.size(0), -1)
out = self.fc(features)
return features, out

View File

@@ -0,0 +1,157 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from .initialization import initialize_resnet
from .SharedUtils import additive_func
class Downsample(nn.Module):
def __init__(self, nIn, nOut, stride):
super(Downsample, self).__init__()
assert stride == 2 and nOut == 2*nIn, 'stride:{} IO:{},{}'.format(stride, nIn, nOut)
self.in_dim = nIn
self.out_dim = nOut
self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
self.conv = nn.Conv2d(nIn, nOut, kernel_size=1, stride=1, padding=0, bias=False)
def forward(self, x):
x = self.avg(x)
out = self.conv(x)
return out
class ConvBNReLU(nn.Module):
def __init__(self, nIn, nOut, kernel, stride, padding, bias, relu):
super(ConvBNReLU, self).__init__()
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, bias=bias)
self.bn = nn.BatchNorm2d(nOut)
if relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
self.out_dim = nOut
self.num_conv = 1
def forward(self, x):
conv = self.conv( x )
bn = self.bn( conv )
if self.relu: return self.relu( bn )
else : return bn
class ResNetBasicblock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ConvBNReLU(inplanes, planes, 3, stride, 1, False, True)
self.conv_b = ConvBNReLU( planes, planes, 3, 1, 1, False, False)
if stride == 2:
self.downsample = Downsample(inplanes, planes, stride)
elif inplanes != planes:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, False)
else:
self.downsample = None
self.out_dim = planes
self.num_conv = 2
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = additive_func(residual, basicblock)
return F.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_1x1 = ConvBNReLU(inplanes, planes, 1, 1, 0, False, True)
self.conv_3x3 = ConvBNReLU( planes, planes, 3, stride, 1, False, True)
self.conv_1x4 = ConvBNReLU(planes, planes*self.expansion, 1, 1, 0, False, False)
if stride == 2:
self.downsample = Downsample(inplanes, planes*self.expansion, stride)
elif inplanes != planes*self.expansion:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, False)
else:
self.downsample = None
self.out_dim = planes * self.expansion
self.num_conv = 3
def forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = additive_func(residual, bottleneck)
return F.relu(out, inplace=True)
class CifarResNet(nn.Module):
def __init__(self, block_name, depth, num_classes, zero_init_residual):
super(CifarResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'ResNetBasicblock':
block = ResNetBasicblock
assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 2) // 6
elif block_name == 'ResNetBottleneck':
block = ResNetBottleneck
assert (depth - 2) % 9 == 0, 'depth should be one of 164'
layer_blocks = (depth - 2) // 9
else:
raise ValueError('invalid block : {:}'.format(block_name))
self.message = 'CifarResNet : Block : {:}, Depth : {:}, Layers for each block : {:}'.format(block_name, depth, layer_blocks)
self.num_classes = num_classes
self.channels = [16]
self.layers = nn.ModuleList( [ ConvBNReLU(3, 16, 3, 1, 1, False, True) ] )
for stage in range(3):
for iL in range(layer_blocks):
iC = self.channels[-1]
planes = 16 * (2**stage)
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iC, planes, stride)
self.channels.append( module.out_dim )
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iC, module.out_dim, stride)
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(module.out_dim, num_classes)
assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth)
self.apply(initialize_resnet)
if zero_init_residual:
for m in self.modules():
if isinstance(m, ResNetBasicblock):
nn.init.constant_(m.conv_b.bn.weight, 0)
elif isinstance(m, ResNetBottleneck):
nn.init.constant_(m.conv_1x4.bn.weight, 0)
def get_message(self):
return self.message
def forward(self, inputs):
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,94 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from .initialization import initialize_resnet
class WideBasicblock(nn.Module):
def __init__(self, inplanes, planes, stride, dropout=False):
super(WideBasicblock, self).__init__()
self.bn_a = nn.BatchNorm2d(inplanes)
self.conv_a = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn_b = nn.BatchNorm2d(planes)
if dropout:
self.dropout = nn.Dropout2d(p=0.5, inplace=True)
else:
self.dropout = None
self.conv_b = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
if inplanes != planes:
self.downsample = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, padding=0, bias=False)
else:
self.downsample = None
def forward(self, x):
basicblock = self.bn_a(x)
basicblock = F.relu(basicblock)
basicblock = self.conv_a(basicblock)
basicblock = self.bn_b(basicblock)
basicblock = F.relu(basicblock)
if self.dropout is not None:
basicblock = self.dropout(basicblock)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
x = self.downsample(x)
return x + basicblock
class CifarWideResNet(nn.Module):
"""
ResNet optimized for the Cifar dataset, as specified in
https://arxiv.org/abs/1512.03385.pdf
"""
def __init__(self, depth, widen_factor, num_classes, dropout):
super(CifarWideResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
assert (depth - 4) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 4) // 6
print ('CifarPreResNet : Depth : {} , Layers for each block : {}'.format(depth, layer_blocks))
self.num_classes = num_classes
self.dropout = dropout
self.conv_3x3 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
self.message = 'Wide ResNet : depth={:}, widen_factor={:}, class={:}'.format(depth, widen_factor, num_classes)
self.inplanes = 16
self.stage_1 = self._make_layer(WideBasicblock, 16*widen_factor, layer_blocks, 1)
self.stage_2 = self._make_layer(WideBasicblock, 32*widen_factor, layer_blocks, 2)
self.stage_3 = self._make_layer(WideBasicblock, 64*widen_factor, layer_blocks, 2)
self.lastact = nn.Sequential(nn.BatchNorm2d(64*widen_factor), nn.ReLU(inplace=True))
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(64*widen_factor, num_classes)
self.apply(initialize_resnet)
def get_message(self):
return self.message
def _make_layer(self, block, planes, blocks, stride):
layers = []
layers.append(block(self.inplanes, planes, stride, self.dropout))
self.inplanes = planes
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, 1, self.dropout))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv_3x3(x)
x = self.stage_1(x)
x = self.stage_2(x)
x = self.stage_3(x)
x = self.lastact(x)
x = self.avgpool(x)
features = x.view(x.size(0), -1)
outs = self.classifier(features)
return features, outs

View File

@@ -0,0 +1,101 @@
# MobileNetV2: Inverted Residuals and Linear Bottlenecks, CVPR 2018
from torch import nn
from .initialization import initialize_resnet
class ConvBNReLU(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False)
self.bn = nn.BatchNorm2d(out_planes)
self.relu = nn.ReLU6(inplace=True)
def forward(self, x):
out = self.conv( x )
out = self.bn ( out )
out = self.relu( out )
return out
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, num_classes, width_mult, input_channel, last_channel, block_name, dropout):
super(MobileNetV2, self).__init__()
if block_name == 'InvertedResidual':
block = InvertedResidual
else:
raise ValueError('invalid block name : {:}'.format(block_name))
inverted_residual_setting = [
# t, c, n, s
[1, 16 , 1, 1],
[6, 24 , 2, 2],
[6, 32 , 3, 2],
[6, 64 , 4, 2],
[6, 96 , 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# building first layer
input_channel = int(input_channel * width_mult)
self.last_channel = int(last_channel * max(1.0, width_mult))
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = int(c * width_mult)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(self.last_channel, num_classes),
)
self.message = 'MobileNetV2 : width_mult={:}, in-C={:}, last-C={:}, block={:}, dropout={:}'.format(width_mult, input_channel, last_channel, block_name, dropout)
# weight initialization
self.apply( initialize_resnet )
def get_message(self):
return self.message
def forward(self, inputs):
features = self.features(inputs)
vectors = features.mean([2, 3])
predicts = self.classifier(vectors)
return features, predicts

View File

@@ -0,0 +1,172 @@
# Deep Residual Learning for Image Recognition, CVPR 2016
import torch.nn as nn
from .initialization import initialize_resnet
def conv3x3(in_planes, out_planes, stride=1, groups=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
def conv1x1(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
super(BasicBlock, self).__init__()
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64):
super(Bottleneck, self).__init__()
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = conv3x3(width, width, stride, groups)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block_name, layers, deep_stem, num_classes, zero_init_residual, groups, width_per_group):
super(ResNet, self).__init__()
#planes = [int(width_per_group * groups * 2 ** i) for i in range(4)]
if block_name == 'BasicBlock' : block= BasicBlock
elif block_name == 'Bottleneck': block= Bottleneck
else : raise ValueError('invalid block-name : {:}'.format(block_name))
if not deep_stem:
self.conv = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
else:
self.conv = nn.Sequential(
nn.Conv2d( 3, 32, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64), nn.ReLU(inplace=True))
self.inplanes = 64
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64 , layers[0], stride=1, groups=groups, base_width=width_per_group)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, groups=groups, base_width=width_per_group)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, groups=groups, base_width=width_per_group)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, groups=groups, base_width=width_per_group)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
self.message = 'block = {:}, layers = {:}, deep_stem = {:}, num_classes = {:}'.format(block, layers, deep_stem, num_classes)
self.apply( initialize_resnet )
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride, groups, base_width):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
if stride == 2:
downsample = nn.Sequential(
nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
conv1x1(self.inplanes, planes * block.expansion, 1),
nn.BatchNorm2d(planes * block.expansion),
)
elif stride == 1:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
else: raise ValueError('invalid stride [{:}] for downsample'.format(stride))
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, groups, base_width))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, 1, None, groups, base_width))
return nn.Sequential(*layers)
def get_message(self):
return self.message
def forward(self, x):
x = self.conv(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.fc(features)
return features, logits

View File

@@ -0,0 +1,34 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch
import torch.nn as nn
def additive_func(A, B):
assert A.dim() == B.dim() and A.size(0) == B.size(0), '{:} vs {:}'.format(A.size(), B.size())
C = min(A.size(1), B.size(1))
if A.size(1) == B.size(1):
return A + B
elif A.size(1) < B.size(1):
out = B.clone()
out[:,:C] += A
return out
else:
out = A.clone()
out[:,:C] += B
return out
def change_key(key, value):
def func(m):
if hasattr(m, key):
setattr(m, key, value)
return func
def parse_channel_info(xstring):
blocks = xstring.split(' ')
blocks = [x.split('-') for x in blocks]
blocks = [[int(_) for _ in x] for x in blocks]
return blocks

View File

@@ -0,0 +1,185 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
from os import path as osp
from typing import List, Text
import torch
__all__ = ['change_key', 'get_cell_based_tiny_net', 'get_search_spaces', 'get_cifar_models', 'get_imagenet_models', \
'obtain_model', 'obtain_search_model', 'load_net_from_checkpoint', \
'CellStructure', 'CellArchitectures'
]
# useful modules
from config_utils import dict2config
from .SharedUtils import change_key
from .cell_searchs import CellStructure, CellArchitectures
# Cell-based NAS Models
def get_cell_based_tiny_net(config):
if isinstance(config, dict): config = dict2config(config, None) # to support the argument being a dict
super_type = getattr(config, 'super_type', 'basic')
group_names = ['DARTS-V1', 'DARTS-V2', 'GDAS', 'SETN', 'ENAS', 'RANDOM']
if super_type == 'basic' and config.name in group_names:
from .cell_searchs import nas201_super_nets as nas_super_nets
try:
return nas_super_nets[config.name](config.C, config.N, config.max_nodes, config.num_classes, config.space, config.affine, config.track_running_stats)
except:
return nas_super_nets[config.name](config.C, config.N, config.max_nodes, config.num_classes, config.space)
elif super_type == 'nasnet-super':
from .cell_searchs import nasnet_super_nets as nas_super_nets
return nas_super_nets[config.name](config.C, config.N, config.steps, config.multiplier, \
config.stem_multiplier, config.num_classes, config.space, config.affine, config.track_running_stats)
elif config.name == 'infer.tiny':
from .cell_infers import TinyNetwork
if hasattr(config, 'genotype'):
genotype = config.genotype
elif hasattr(config, 'arch_str'):
genotype = CellStructure.str2structure(config.arch_str)
else: raise ValueError('Can not find genotype from this config : {:}'.format(config))
return TinyNetwork(config.C, config.N, genotype, config.num_classes)
elif config.name == 'infer.shape.tiny':
from .shape_infers import DynamicShapeTinyNet
if isinstance(config.channels, str):
channels = tuple([int(x) for x in config.channels.split(':')])
else: channels = config.channels
genotype = CellStructure.str2structure(config.genotype)
return DynamicShapeTinyNet(channels, genotype, config.num_classes)
elif config.name == 'infer.nasnet-cifar':
from .cell_infers import NASNetonCIFAR
raise NotImplementedError
else:
raise ValueError('invalid network name : {:}'.format(config.name))
# obtain the search space, i.e., a dict mapping the operation name into a python-function for this op
def get_search_spaces(xtype, name) -> List[Text]:
if xtype == 'cell':
from .cell_operations import SearchSpaceNames
assert name in SearchSpaceNames, 'invalid name [{:}] in {:}'.format(name, SearchSpaceNames.keys())
return SearchSpaceNames[name]
else:
raise ValueError('invalid search-space type is {:}'.format(xtype))
def get_cifar_models(config, extra_path=None):
super_type = getattr(config, 'super_type', 'basic')
if super_type == 'basic':
from .CifarResNet import CifarResNet
from .CifarDenseNet import DenseNet
from .CifarWideResNet import CifarWideResNet
if config.arch == 'resnet':
return CifarResNet(config.module, config.depth, config.class_num, config.zero_init_residual)
elif config.arch == 'densenet':
return DenseNet(config.growthRate, config.depth, config.reduction, config.class_num, config.bottleneck)
elif config.arch == 'wideresnet':
return CifarWideResNet(config.depth, config.wide_factor, config.class_num, config.dropout)
else:
raise ValueError('invalid module type : {:}'.format(config.arch))
elif super_type.startswith('infer'):
from .shape_infers import InferWidthCifarResNet
from .shape_infers import InferDepthCifarResNet
from .shape_infers import InferCifarResNet
from .cell_infers import NASNetonCIFAR
assert len(super_type.split('-')) == 2, 'invalid super_type : {:}'.format(super_type)
infer_mode = super_type.split('-')[1]
if infer_mode == 'width':
return InferWidthCifarResNet(config.module, config.depth, config.xchannels, config.class_num, config.zero_init_residual)
elif infer_mode == 'depth':
return InferDepthCifarResNet(config.module, config.depth, config.xblocks, config.class_num, config.zero_init_residual)
elif infer_mode == 'shape':
return InferCifarResNet(config.module, config.depth, config.xblocks, config.xchannels, config.class_num, config.zero_init_residual)
elif infer_mode == 'nasnet.cifar':
genotype = config.genotype
if extra_path is not None: # reload genotype by extra_path
if not osp.isfile(extra_path): raise ValueError('invalid extra_path : {:}'.format(extra_path))
xdata = torch.load(extra_path)
current_epoch = xdata['epoch']
genotype = xdata['genotypes'][current_epoch-1]
C = config.C if hasattr(config, 'C') else config.ichannel
N = config.N if hasattr(config, 'N') else config.layers
return NASNetonCIFAR(C, N, config.stem_multi, config.class_num, genotype, config.auxiliary)
else:
raise ValueError('invalid infer-mode : {:}'.format(infer_mode))
else:
raise ValueError('invalid super-type : {:}'.format(super_type))
def get_imagenet_models(config):
super_type = getattr(config, 'super_type', 'basic')
if super_type == 'basic':
from .ImageNet_ResNet import ResNet
from .ImageNet_MobileNetV2 import MobileNetV2
if config.arch == 'resnet':
return ResNet(config.block_name, config.layers, config.deep_stem, config.class_num, config.zero_init_residual, config.groups, config.width_per_group)
elif config.arch == 'mobilenet_v2':
return MobileNetV2(config.class_num, config.width_multi, config.input_channel, config.last_channel, 'InvertedResidual', config.dropout)
else:
raise ValueError('invalid arch : {:}'.format( config.arch ))
elif super_type.startswith('infer'): # NAS searched architecture
assert len(super_type.split('-')) == 2, 'invalid super_type : {:}'.format(super_type)
infer_mode = super_type.split('-')[1]
if infer_mode == 'shape':
from .shape_infers import InferImagenetResNet
from .shape_infers import InferMobileNetV2
if config.arch == 'resnet':
return InferImagenetResNet(config.block_name, config.layers, config.xblocks, config.xchannels, config.deep_stem, config.class_num, config.zero_init_residual)
elif config.arch == "MobileNetV2":
return InferMobileNetV2(config.class_num, config.xchannels, config.xblocks, config.dropout)
else:
raise ValueError('invalid arch-mode : {:}'.format(config.arch))
else:
raise ValueError('invalid infer-mode : {:}'.format(infer_mode))
else:
raise ValueError('invalid super-type : {:}'.format(super_type))
# Try to obtain the network by config.
def obtain_model(config, extra_path=None):
if config.dataset == 'cifar':
return get_cifar_models(config, extra_path)
elif config.dataset == 'imagenet':
return get_imagenet_models(config)
else:
raise ValueError('invalid dataset in the model config : {:}'.format(config))
def obtain_search_model(config):
if config.dataset == 'cifar':
if config.arch == 'resnet':
from .shape_searchs import SearchWidthCifarResNet
from .shape_searchs import SearchDepthCifarResNet
from .shape_searchs import SearchShapeCifarResNet
if config.search_mode == 'width':
return SearchWidthCifarResNet(config.module, config.depth, config.class_num)
elif config.search_mode == 'depth':
return SearchDepthCifarResNet(config.module, config.depth, config.class_num)
elif config.search_mode == 'shape':
return SearchShapeCifarResNet(config.module, config.depth, config.class_num)
else: raise ValueError('invalid search mode : {:}'.format(config.search_mode))
elif config.arch == 'simres':
from .shape_searchs import SearchWidthSimResNet
if config.search_mode == 'width':
return SearchWidthSimResNet(config.depth, config.class_num)
else: raise ValueError('invalid search mode : {:}'.format(config.search_mode))
else:
raise ValueError('invalid arch : {:} for dataset [{:}]'.format(config.arch, config.dataset))
elif config.dataset == 'imagenet':
from .shape_searchs import SearchShapeImagenetResNet
assert config.search_mode == 'shape', 'invalid search-mode : {:}'.format( config.search_mode )
if config.arch == 'resnet':
return SearchShapeImagenetResNet(config.block_name, config.layers, config.deep_stem, config.class_num)
else:
raise ValueError('invalid model config : {:}'.format(config))
else:
raise ValueError('invalid dataset in the model config : {:}'.format(config))
def load_net_from_checkpoint(checkpoint):
assert osp.isfile(checkpoint), 'checkpoint {:} does not exist'.format(checkpoint)
checkpoint = torch.load(checkpoint)
model_config = dict2config(checkpoint['model-config'], None)
model = obtain_model(model_config)
model.load_state_dict(checkpoint['base-model'])
return model

View File

@@ -0,0 +1,5 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
from .tiny_network import TinyNetwork
from .nasnet_cifar import NASNetonCIFAR

View File

@@ -0,0 +1,120 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch
import torch.nn as nn
from copy import deepcopy
from ..cell_operations import OPS
# Cell for NAS-Bench-201
class InferCell(nn.Module):
def __init__(self, genotype, C_in, C_out, stride):
super(InferCell, self).__init__()
self.layers = nn.ModuleList()
self.node_IN = []
self.node_IX = []
self.genotype = deepcopy(genotype)
for i in range(1, len(genotype)):
node_info = genotype[i-1]
cur_index = []
cur_innod = []
for (op_name, op_in) in node_info:
if op_in == 0:
layer = OPS[op_name](C_in , C_out, stride, True, True)
else:
layer = OPS[op_name](C_out, C_out, 1, True, True)
cur_index.append( len(self.layers) )
cur_innod.append( op_in )
self.layers.append( layer )
self.node_IX.append( cur_index )
self.node_IN.append( cur_innod )
self.nodes = len(genotype)
self.in_dim = C_in
self.out_dim = C_out
def extra_repr(self):
string = 'info :: nodes={nodes}, inC={in_dim}, outC={out_dim}'.format(**self.__dict__)
laystr = []
for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)):
y = ['I{:}-L{:}'.format(_ii, _il) for _il, _ii in zip(node_layers, node_innods)]
x = '{:}<-({:})'.format(i+1, ','.join(y))
laystr.append( x )
return string + ', [{:}]'.format( ' | '.join(laystr) ) + ', {:}'.format(self.genotype.tostr())
def forward(self, inputs):
nodes = [inputs]
for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)):
node_feature = sum( self.layers[_il](nodes[_ii]) for _il, _ii in zip(node_layers, node_innods) )
nodes.append( node_feature )
return nodes[-1]
# Learning Transferable Architectures for Scalable Image Recognition, CVPR 2018
class NASNetInferCell(nn.Module):
def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev, affine, track_running_stats):
super(NASNetInferCell, self).__init__()
self.reduction = reduction
if reduction_prev: self.preprocess0 = OPS['skip_connect'](C_prev_prev, C, 2, affine, track_running_stats)
else : self.preprocess0 = OPS['nor_conv_1x1'](C_prev_prev, C, 1, affine, track_running_stats)
self.preprocess1 = OPS['nor_conv_1x1'](C_prev, C, 1, affine, track_running_stats)
if not reduction:
nodes, concats = genotype['normal'], genotype['normal_concat']
else:
nodes, concats = genotype['reduce'], genotype['reduce_concat']
self._multiplier = len(concats)
self._concats = concats
self._steps = len(nodes)
self._nodes = nodes
self.edges = nn.ModuleDict()
for i, node in enumerate(nodes):
for in_node in node:
name, j = in_node[0], in_node[1]
stride = 2 if reduction and j < 2 else 1
node_str = '{:}<-{:}'.format(i+2, j)
self.edges[node_str] = OPS[name](C, C, stride, affine, track_running_stats)
# [TODO] to support drop_prob in this function..
def forward(self, s0, s1, unused_drop_prob):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
for i, node in enumerate(self._nodes):
clist = []
for in_node in node:
name, j = in_node[0], in_node[1]
node_str = '{:}<-{:}'.format(i+2, j)
op = self.edges[ node_str ]
clist.append( op(states[j]) )
states.append( sum(clist) )
return torch.cat([states[x] for x in self._concats], dim=1)
class AuxiliaryHeadCIFAR(nn.Module):
def __init__(self, C, num_classes):
"""assuming input size 8x8"""
super(AuxiliaryHeadCIFAR, self).__init__()
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_classes)
def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0),-1))
return x

View File

@@ -0,0 +1,71 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch
import torch.nn as nn
from copy import deepcopy
from .cells import NASNetInferCell as InferCell, AuxiliaryHeadCIFAR
# The macro structure is based on NASNet
class NASNetonCIFAR(nn.Module):
def __init__(self, C, N, stem_multiplier, num_classes, genotype, auxiliary, affine=True, track_running_stats=True):
super(NASNetonCIFAR, self).__init__()
self._C = C
self._layerN = N
self.stem = nn.Sequential(
nn.Conv2d(3, C*stem_multiplier, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C*stem_multiplier))
# config for each layer
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * (N-1) + [C*4 ] + [C*4 ] * (N-1)
layer_reductions = [False] * N + [True] + [False] * (N-1) + [True] + [False] * (N-1)
C_prev_prev, C_prev, C_curr, reduction_prev = C*stem_multiplier, C*stem_multiplier, C, False
self.auxiliary_index = None
self.auxiliary_head = None
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
cell = InferCell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, affine, track_running_stats)
self.cells.append( cell )
C_prev_prev, C_prev, reduction_prev = C_prev, cell._multiplier*C_curr, reduction
if reduction and C_curr == C*4 and auxiliary:
self.auxiliary_head = AuxiliaryHeadCIFAR(C_prev, num_classes)
self.auxiliary_index = index
self._Layer = len(self.cells)
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.drop_path_prob = -1
def update_drop_path(self, drop_path_prob):
self.drop_path_prob = drop_path_prob
def auxiliary_param(self):
if self.auxiliary_head is None: return []
else: return list( self.auxiliary_head.parameters() )
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def forward(self, inputs):
stem_feature, logits_aux = self.stem(inputs), None
cell_results = [stem_feature, stem_feature]
for i, cell in enumerate(self.cells):
cell_feature = cell(cell_results[-2], cell_results[-1], self.drop_path_prob)
cell_results.append( cell_feature )
if self.auxiliary_index is not None and i == self.auxiliary_index and self.training:
logits_aux = self.auxiliary_head( cell_results[-1] )
out = self.lastact(cell_results[-1])
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
if logits_aux is None: return out, logits
else: return out, [logits, logits_aux]

View File

@@ -0,0 +1,58 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch.nn as nn
from ..cell_operations import ResNetBasicblock
from .cells import InferCell
# The macro structure for architectures in NAS-Bench-201
class TinyNetwork(nn.Module):
def __init__(self, C, N, genotype, num_classes):
super(TinyNetwork, self).__init__()
self._C = C
self._layerN = N
self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C))
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
C_prev = C
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2, True)
else:
cell = InferCell(genotype, C_prev, C_curr, 1)
self.cells.append( cell )
C_prev = cell.out_dim
self._Layer= len(self.cells)
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def forward(self, inputs):
feature = self.stem(inputs)
for i, cell in enumerate(self.cells):
feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return logits, out

View File

@@ -0,0 +1,297 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import torch
import torch.nn as nn
__all__ = ['OPS', 'ResNetBasicblock', 'SearchSpaceNames']
OPS = {
'none' : lambda C_in, C_out, stride, affine, track_running_stats: Zero(C_in, C_out, stride),
'avg_pool_3x3' : lambda C_in, C_out, stride, affine, track_running_stats: POOLING(C_in, C_out, stride, 'avg', affine, track_running_stats),
'max_pool_3x3' : lambda C_in, C_out, stride, affine, track_running_stats: POOLING(C_in, C_out, stride, 'max', affine, track_running_stats),
'nor_conv_7x7' : lambda C_in, C_out, stride, affine, track_running_stats: ReLUConvBN(C_in, C_out, (7,7), (stride,stride), (3,3), (1,1), affine, track_running_stats),
'nor_conv_3x3' : lambda C_in, C_out, stride, affine, track_running_stats: ReLUConvBN(C_in, C_out, (3,3), (stride,stride), (1,1), (1,1), affine, track_running_stats),
'nor_conv_1x1' : lambda C_in, C_out, stride, affine, track_running_stats: ReLUConvBN(C_in, C_out, (1,1), (stride,stride), (0,0), (1,1), affine, track_running_stats),
'dua_sepc_3x3' : lambda C_in, C_out, stride, affine, track_running_stats: DualSepConv(C_in, C_out, (3,3), (stride,stride), (1,1), (1,1), affine, track_running_stats),
'dua_sepc_5x5' : lambda C_in, C_out, stride, affine, track_running_stats: DualSepConv(C_in, C_out, (5,5), (stride,stride), (2,2), (1,1), affine, track_running_stats),
'dil_sepc_3x3' : lambda C_in, C_out, stride, affine, track_running_stats: SepConv(C_in, C_out, (3,3), (stride,stride), (2,2), (2,2), affine, track_running_stats),
'dil_sepc_5x5' : lambda C_in, C_out, stride, affine, track_running_stats: SepConv(C_in, C_out, (5,5), (stride,stride), (4,4), (2,2), affine, track_running_stats),
'skip_connect' : lambda C_in, C_out, stride, affine, track_running_stats: Identity() if stride == 1 and C_in == C_out else FactorizedReduce(C_in, C_out, stride, affine, track_running_stats),
}
CONNECT_NAS_BENCHMARK = ['none', 'skip_connect', 'nor_conv_3x3']
NAS_BENCH_201 = ['none', 'skip_connect', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3']
DARTS_SPACE = ['none', 'skip_connect', 'dua_sepc_3x3', 'dua_sepc_5x5', 'dil_sepc_3x3', 'dil_sepc_5x5', 'avg_pool_3x3', 'max_pool_3x3']
SearchSpaceNames = {'connect-nas' : CONNECT_NAS_BENCHMARK,
'nas-bench-201': NAS_BENCH_201,
'darts' : DARTS_SPACE}
class ReLUConvBN(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine, track_running_stats=True):
super(ReLUConvBN, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=False),
nn.BatchNorm2d(C_out, affine=affine, track_running_stats=track_running_stats)
)
def forward(self, x):
return self.op(x)
class SepConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine, track_running_stats=True):
super(SepConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine, track_running_stats=track_running_stats),
)
def forward(self, x):
return self.op(x)
class DualSepConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine, track_running_stats=True):
super(DualSepConv, self).__init__()
self.op_a = SepConv(C_in, C_in , kernel_size, stride, padding, dilation, affine, track_running_stats)
self.op_b = SepConv(C_in, C_out, kernel_size, 1, padding, dilation, affine, track_running_stats)
def forward(self, x):
x = self.op_a(x)
x = self.op_b(x)
return x
class ResNetBasicblock(nn.Module):
def __init__(self, inplanes, planes, stride, affine=True):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ReLUConvBN(inplanes, planes, 3, stride, 1, 1, affine)
self.conv_b = ReLUConvBN( planes, planes, 3, 1, 1, 1, affine)
if stride == 2:
self.downsample = nn.Sequential(
nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False))
elif inplanes != planes:
self.downsample = ReLUConvBN(inplanes, planes, 1, 1, 0, 1, affine)
else:
self.downsample = None
self.in_dim = inplanes
self.out_dim = planes
self.stride = stride
self.num_conv = 2
def extra_repr(self):
string = '{name}(inC={in_dim}, outC={out_dim}, stride={stride})'.format(name=self.__class__.__name__, **self.__dict__)
return string
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
return residual + basicblock
class POOLING(nn.Module):
def __init__(self, C_in, C_out, stride, mode, affine=True, track_running_stats=True):
super(POOLING, self).__init__()
if C_in == C_out:
self.preprocess = None
else:
self.preprocess = ReLUConvBN(C_in, C_out, 1, 1, 0, 1, affine, track_running_stats)
if mode == 'avg' : self.op = nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False)
elif mode == 'max': self.op = nn.MaxPool2d(3, stride=stride, padding=1)
else : raise ValueError('Invalid mode={:} in POOLING'.format(mode))
def forward(self, inputs):
if self.preprocess: x = self.preprocess(inputs)
else : x = inputs
return self.op(x)
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
class Zero(nn.Module):
def __init__(self, C_in, C_out, stride):
super(Zero, self).__init__()
self.C_in = C_in
self.C_out = C_out
self.stride = stride
self.is_zero = True
def forward(self, x):
if self.C_in == self.C_out:
if self.stride == 1: return x.mul(0.)
else : return x[:,:,::self.stride,::self.stride].mul(0.)
else:
shape = list(x.shape)
shape[1] = self.C_out
zeros = x.new_zeros(shape, dtype=x.dtype, device=x.device)
return zeros
def extra_repr(self):
return 'C_in={C_in}, C_out={C_out}, stride={stride}'.format(**self.__dict__)
class FactorizedReduce(nn.Module):
def __init__(self, C_in, C_out, stride, affine, track_running_stats):
super(FactorizedReduce, self).__init__()
self.stride = stride
self.C_in = C_in
self.C_out = C_out
self.relu = nn.ReLU(inplace=False)
if stride == 2:
#assert C_out % 2 == 0, 'C_out : {:}'.format(C_out)
C_outs = [C_out // 2, C_out - C_out // 2]
self.convs = nn.ModuleList()
for i in range(2):
self.convs.append( nn.Conv2d(C_in, C_outs[i], 1, stride=stride, padding=0, bias=False) )
self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
elif stride == 1:
self.conv = nn.Conv2d(C_in, C_out, 1, stride=stride, padding=0, bias=False)
else:
raise ValueError('Invalid stride : {:}'.format(stride))
self.bn = nn.BatchNorm2d(C_out, affine=affine, track_running_stats=track_running_stats)
def forward(self, x):
if self.stride == 2:
x = self.relu(x)
y = self.pad(x)
out = torch.cat([self.convs[0](x), self.convs[1](y[:,:,1:,1:])], dim=1)
else:
out = self.conv(x)
out = self.bn(out)
return out
def extra_repr(self):
return 'C_in={C_in}, C_out={C_out}, stride={stride}'.format(**self.__dict__)
# Auto-ReID: Searching for a Part-Aware ConvNet for Person Re-Identification, ICCV 2019
class PartAwareOp(nn.Module):
def __init__(self, C_in, C_out, stride, part=4):
super().__init__()
self.part = 4
self.hidden = C_in // 3
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.local_conv_list = nn.ModuleList()
for i in range(self.part):
self.local_conv_list.append(
nn.Sequential(nn.ReLU(), nn.Conv2d(C_in, self.hidden, 1), nn.BatchNorm2d(self.hidden, affine=True))
)
self.W_K = nn.Linear(self.hidden, self.hidden)
self.W_Q = nn.Linear(self.hidden, self.hidden)
if stride == 2 : self.last = FactorizedReduce(C_in + self.hidden, C_out, 2)
elif stride == 1: self.last = FactorizedReduce(C_in + self.hidden, C_out, 1)
else: raise ValueError('Invalid Stride : {:}'.format(stride))
def forward(self, x):
batch, C, H, W = x.size()
assert H >= self.part, 'input size too small : {:} vs {:}'.format(x.shape, self.part)
IHs = [0]
for i in range(self.part): IHs.append( min(H, int((i+1)*(float(H)/self.part))) )
local_feat_list = []
for i in range(self.part):
feature = x[:, :, IHs[i]:IHs[i+1], :]
xfeax = self.avg_pool(feature)
xfea = self.local_conv_list[i]( xfeax )
local_feat_list.append( xfea )
part_feature = torch.cat(local_feat_list, dim=2).view(batch, -1, self.part)
part_feature = part_feature.transpose(1,2).contiguous()
part_K = self.W_K(part_feature)
part_Q = self.W_Q(part_feature).transpose(1,2).contiguous()
weight_att = torch.bmm(part_K, part_Q)
attention = torch.softmax(weight_att, dim=2)
aggreateF = torch.bmm(attention, part_feature).transpose(1,2).contiguous()
features = []
for i in range(self.part):
feature = aggreateF[:, :, i:i+1].expand(batch, self.hidden, IHs[i+1]-IHs[i])
feature = feature.view(batch, self.hidden, IHs[i+1]-IHs[i], 1)
features.append( feature )
features = torch.cat(features, dim=2).expand(batch, self.hidden, H, W)
final_fea = torch.cat((x,features), dim=1)
outputs = self.last( final_fea )
return outputs
# Searching for A Robust Neural Architecture in Four GPU Hours
class GDAS_Reduction_Cell(nn.Module):
def __init__(self, C_prev_prev, C_prev, C, reduction_prev, multiplier, affine, track_running_stats):
super(GDAS_Reduction_Cell, self).__init__()
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C, 2, affine, track_running_stats)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, 1, affine, track_running_stats)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, 1, affine, track_running_stats)
self.multiplier = multiplier
self.reduction = True
self.ops1 = nn.ModuleList(
[nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
nn.BatchNorm2d(C, affine=True),
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(C, affine=True)),
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1, 3), stride=(1, 2), padding=(0, 1), groups=8, bias=False),
nn.Conv2d(C, C, (3, 1), stride=(2, 1), padding=(1, 0), groups=8, bias=False),
nn.BatchNorm2d(C, affine=True),
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(C, affine=True))])
self.ops2 = nn.ModuleList(
[nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
nn.BatchNorm2d(C, affine=True)),
nn.Sequential(
nn.MaxPool2d(3, stride=2, padding=1),
nn.BatchNorm2d(C, affine=True))])
def forward(self, s0, s1, drop_prob = -1):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
X0 = self.ops1[0] (s0)
X1 = self.ops1[1] (s1)
if self.training and drop_prob > 0.:
X0, X1 = drop_path(X0, drop_prob), drop_path(X1, drop_prob)
#X2 = self.ops2[0] (X0+X1)
X2 = self.ops2[0] (s0)
X3 = self.ops2[1] (s1)
if self.training and drop_prob > 0.:
X2, X3 = drop_path(X2, drop_prob), drop_path(X3, drop_prob)
return torch.cat([X0, X1, X2, X3], dim=1)

View File

@@ -0,0 +1,24 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
# The macro structure is defined in NAS-Bench-201
from .search_model_darts import TinyNetworkDarts
from .search_model_gdas import TinyNetworkGDAS
from .search_model_setn import TinyNetworkSETN
from .search_model_enas import TinyNetworkENAS
from .search_model_random import TinyNetworkRANDOM
from .genotypes import Structure as CellStructure, architectures as CellArchitectures
# NASNet-based macro structure
from .search_model_gdas_nasnet import NASNetworkGDAS
from .search_model_darts_nasnet import NASNetworkDARTS
nas201_super_nets = {'DARTS-V1': TinyNetworkDarts,
"DARTS-V2": TinyNetworkDarts,
"GDAS": TinyNetworkGDAS,
"SETN": TinyNetworkSETN,
"ENAS": TinyNetworkENAS,
"RANDOM": TinyNetworkRANDOM}
nasnet_super_nets = {"GDAS": NASNetworkGDAS,
"DARTS": NASNetworkDARTS}

View File

@@ -0,0 +1,12 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import torch
from search_model_enas_utils import Controller
def main():
controller = Controller(6, 4)
predictions = controller()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,199 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
from copy import deepcopy
def get_combination(space, num):
combs = []
for i in range(num):
if i == 0:
for func in space:
combs.append( [(func, i)] )
else:
new_combs = []
for string in combs:
for func in space:
xstring = string + [(func, i)]
new_combs.append( xstring )
combs = new_combs
return combs
class Structure:
def __init__(self, genotype):
assert isinstance(genotype, list) or isinstance(genotype, tuple), 'invalid class of genotype : {:}'.format(type(genotype))
self.node_num = len(genotype) + 1
self.nodes = []
self.node_N = []
for idx, node_info in enumerate(genotype):
assert isinstance(node_info, list) or isinstance(node_info, tuple), 'invalid class of node_info : {:}'.format(type(node_info))
assert len(node_info) >= 1, 'invalid length : {:}'.format(len(node_info))
for node_in in node_info:
assert isinstance(node_in, list) or isinstance(node_in, tuple), 'invalid class of in-node : {:}'.format(type(node_in))
assert len(node_in) == 2 and node_in[1] <= idx, 'invalid in-node : {:}'.format(node_in)
self.node_N.append( len(node_info) )
self.nodes.append( tuple(deepcopy(node_info)) )
def tolist(self, remove_str):
# convert this class to the list, if remove_str is 'none', then remove the 'none' operation.
# note that we re-order the input node in this function
# return the-genotype-list and success [if unsuccess, it is not a connectivity]
genotypes = []
for node_info in self.nodes:
node_info = list( node_info )
node_info = sorted(node_info, key=lambda x: (x[1], x[0]))
node_info = tuple(filter(lambda x: x[0] != remove_str, node_info))
if len(node_info) == 0: return None, False
genotypes.append( node_info )
return genotypes, True
def node(self, index):
assert index > 0 and index <= len(self), 'invalid index={:} < {:}'.format(index, len(self))
return self.nodes[index]
def tostr(self):
strings = []
for node_info in self.nodes:
string = '|'.join([x[0]+'~{:}'.format(x[1]) for x in node_info])
string = '|{:}|'.format(string)
strings.append( string )
return '+'.join(strings)
def check_valid(self):
nodes = {0: True}
for i, node_info in enumerate(self.nodes):
sums = []
for op, xin in node_info:
if op == 'none' or nodes[xin] is False: x = False
else: x = True
sums.append( x )
nodes[i+1] = sum(sums) > 0
return nodes[len(self.nodes)]
def to_unique_str(self, consider_zero=False):
# this is used to identify the isomorphic cell, which rerquires the prior knowledge of operation
# two operations are special, i.e., none and skip_connect
nodes = {0: '0'}
for i_node, node_info in enumerate(self.nodes):
cur_node = []
for op, xin in node_info:
if consider_zero is None:
x = '('+nodes[xin]+')' + '@{:}'.format(op)
elif consider_zero:
if op == 'none' or nodes[xin] == '#': x = '#' # zero
elif op == 'skip_connect': x = nodes[xin]
else: x = '('+nodes[xin]+')' + '@{:}'.format(op)
else:
if op == 'skip_connect': x = nodes[xin]
else: x = '('+nodes[xin]+')' + '@{:}'.format(op)
cur_node.append(x)
nodes[i_node+1] = '+'.join( sorted(cur_node) )
return nodes[ len(self.nodes) ]
def check_valid_op(self, op_names):
for node_info in self.nodes:
for inode_edge in node_info:
#assert inode_edge[0] in op_names, 'invalid op-name : {:}'.format(inode_edge[0])
if inode_edge[0] not in op_names: return False
return True
def __repr__(self):
return ('{name}({node_num} nodes with {node_info})'.format(name=self.__class__.__name__, node_info=self.tostr(), **self.__dict__))
def __len__(self):
return len(self.nodes) + 1
def __getitem__(self, index):
return self.nodes[index]
@staticmethod
def str2structure(xstr):
assert isinstance(xstr, str), 'must take string (not {:}) as input'.format(type(xstr))
nodestrs = xstr.split('+')
genotypes = []
for i, node_str in enumerate(nodestrs):
inputs = list(filter(lambda x: x != '', node_str.split('|')))
for xinput in inputs: assert len(xinput.split('~')) == 2, 'invalid input length : {:}'.format(xinput)
inputs = ( xi.split('~') for xi in inputs )
input_infos = tuple( (op, int(IDX)) for (op, IDX) in inputs)
genotypes.append( input_infos )
return Structure( genotypes )
@staticmethod
def str2fullstructure(xstr, default_name='none'):
assert isinstance(xstr, str), 'must take string (not {:}) as input'.format(type(xstr))
nodestrs = xstr.split('+')
genotypes = []
for i, node_str in enumerate(nodestrs):
inputs = list(filter(lambda x: x != '', node_str.split('|')))
for xinput in inputs: assert len(xinput.split('~')) == 2, 'invalid input length : {:}'.format(xinput)
inputs = ( xi.split('~') for xi in inputs )
input_infos = list( (op, int(IDX)) for (op, IDX) in inputs)
all_in_nodes= list(x[1] for x in input_infos)
for j in range(i):
if j not in all_in_nodes: input_infos.append((default_name, j))
node_info = sorted(input_infos, key=lambda x: (x[1], x[0]))
genotypes.append( tuple(node_info) )
return Structure( genotypes )
@staticmethod
def gen_all(search_space, num, return_ori):
assert isinstance(search_space, list) or isinstance(search_space, tuple), 'invalid class of search-space : {:}'.format(type(search_space))
assert num >= 2, 'There should be at least two nodes in a neural cell instead of {:}'.format(num)
all_archs = get_combination(search_space, 1)
for i, arch in enumerate(all_archs):
all_archs[i] = [ tuple(arch) ]
for inode in range(2, num):
cur_nodes = get_combination(search_space, inode)
new_all_archs = []
for previous_arch in all_archs:
for cur_node in cur_nodes:
new_all_archs.append( previous_arch + [tuple(cur_node)] )
all_archs = new_all_archs
if return_ori:
return all_archs
else:
return [Structure(x) for x in all_archs]
ResNet_CODE = Structure(
[(('nor_conv_3x3', 0), ), # node-1
(('nor_conv_3x3', 1), ), # node-2
(('skip_connect', 0), ('skip_connect', 2))] # node-3
)
AllConv3x3_CODE = Structure(
[(('nor_conv_3x3', 0), ), # node-1
(('nor_conv_3x3', 0), ('nor_conv_3x3', 1)), # node-2
(('nor_conv_3x3', 0), ('nor_conv_3x3', 1), ('nor_conv_3x3', 2))] # node-3
)
AllFull_CODE = Structure(
[(('skip_connect', 0), ('nor_conv_1x1', 0), ('nor_conv_3x3', 0), ('avg_pool_3x3', 0)), # node-1
(('skip_connect', 0), ('nor_conv_1x1', 0), ('nor_conv_3x3', 0), ('avg_pool_3x3', 0), ('skip_connect', 1), ('nor_conv_1x1', 1), ('nor_conv_3x3', 1), ('avg_pool_3x3', 1)), # node-2
(('skip_connect', 0), ('nor_conv_1x1', 0), ('nor_conv_3x3', 0), ('avg_pool_3x3', 0), ('skip_connect', 1), ('nor_conv_1x1', 1), ('nor_conv_3x3', 1), ('avg_pool_3x3', 1), ('skip_connect', 2), ('nor_conv_1x1', 2), ('nor_conv_3x3', 2), ('avg_pool_3x3', 2))] # node-3
)
AllConv1x1_CODE = Structure(
[(('nor_conv_1x1', 0), ), # node-1
(('nor_conv_1x1', 0), ('nor_conv_1x1', 1)), # node-2
(('nor_conv_1x1', 0), ('nor_conv_1x1', 1), ('nor_conv_1x1', 2))] # node-3
)
AllIdentity_CODE = Structure(
[(('skip_connect', 0), ), # node-1
(('skip_connect', 0), ('skip_connect', 1)), # node-2
(('skip_connect', 0), ('skip_connect', 1), ('skip_connect', 2))] # node-3
)
architectures = {'resnet' : ResNet_CODE,
'all_c3x3': AllConv3x3_CODE,
'all_c1x1': AllConv1x1_CODE,
'all_idnt': AllIdentity_CODE,
'all_full': AllFull_CODE}

View File

@@ -0,0 +1,197 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, random, torch
import warnings
import torch.nn as nn
import torch.nn.functional as F
from copy import deepcopy
from ..cell_operations import OPS
# This module is used for NAS-Bench-201, represents a small search space with a complete DAG
class NAS201SearchCell(nn.Module):
def __init__(self, C_in, C_out, stride, max_nodes, op_names, affine=False, track_running_stats=True):
super(NAS201SearchCell, self).__init__()
self.op_names = deepcopy(op_names)
self.edges = nn.ModuleDict()
self.max_nodes = max_nodes
self.in_dim = C_in
self.out_dim = C_out
for i in range(1, max_nodes):
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
if j == 0:
xlists = [OPS[op_name](C_in , C_out, stride, affine, track_running_stats) for op_name in op_names]
else:
xlists = [OPS[op_name](C_in , C_out, 1, affine, track_running_stats) for op_name in op_names]
self.edges[ node_str ] = nn.ModuleList( xlists )
self.edge_keys = sorted(list(self.edges.keys()))
self.edge2index = {key:i for i, key in enumerate(self.edge_keys)}
self.num_edges = len(self.edges)
def extra_repr(self):
string = 'info :: {max_nodes} nodes, inC={in_dim}, outC={out_dim}'.format(**self.__dict__)
return string
def forward(self, inputs, weightss):
nodes = [inputs]
for i in range(1, self.max_nodes):
inter_nodes = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
weights = weightss[ self.edge2index[node_str] ]
inter_nodes.append( sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) )
nodes.append( sum(inter_nodes) )
return nodes[-1]
# GDAS
def forward_gdas(self, inputs, hardwts, index):
nodes = [inputs]
for i in range(1, self.max_nodes):
inter_nodes = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
weights = hardwts[ self.edge2index[node_str] ]
argmaxs = index[ self.edge2index[node_str] ].item()
weigsum = sum( weights[_ie] * edge(nodes[j]) if _ie == argmaxs else weights[_ie] for _ie, edge in enumerate(self.edges[node_str]) )
inter_nodes.append( weigsum )
nodes.append( sum(inter_nodes) )
return nodes[-1]
# joint
def forward_joint(self, inputs, weightss):
nodes = [inputs]
for i in range(1, self.max_nodes):
inter_nodes = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
weights = weightss[ self.edge2index[node_str] ]
#aggregation = sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) / weights.numel()
aggregation = sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) )
inter_nodes.append( aggregation )
nodes.append( sum(inter_nodes) )
return nodes[-1]
# uniform random sampling per iteration, SETN
def forward_urs(self, inputs):
nodes = [inputs]
for i in range(1, self.max_nodes):
while True: # to avoid select zero for all ops
sops, has_non_zero = [], False
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
candidates = self.edges[node_str]
select_op = random.choice(candidates)
sops.append( select_op )
if not hasattr(select_op, 'is_zero') or select_op.is_zero is False: has_non_zero=True
if has_non_zero: break
inter_nodes = []
for j, select_op in enumerate(sops):
inter_nodes.append( select_op(nodes[j]) )
nodes.append( sum(inter_nodes) )
return nodes[-1]
# select the argmax
def forward_select(self, inputs, weightss):
nodes = [inputs]
for i in range(1, self.max_nodes):
inter_nodes = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
weights = weightss[ self.edge2index[node_str] ]
inter_nodes.append( self.edges[node_str][ weights.argmax().item() ]( nodes[j] ) )
#inter_nodes.append( sum( layer(nodes[j]) * w for layer, w in zip(self.edges[node_str], weights) ) )
nodes.append( sum(inter_nodes) )
return nodes[-1]
# forward with a specific structure
def forward_dynamic(self, inputs, structure):
nodes = [inputs]
for i in range(1, self.max_nodes):
cur_op_node = structure.nodes[i-1]
inter_nodes = []
for op_name, j in cur_op_node:
node_str = '{:}<-{:}'.format(i, j)
op_index = self.op_names.index( op_name )
inter_nodes.append( self.edges[node_str][op_index]( nodes[j] ) )
nodes.append( sum(inter_nodes) )
return nodes[-1]
class MixedOp(nn.Module):
def __init__(self, space, C, stride, affine, track_running_stats):
super(MixedOp, self).__init__()
self._ops = nn.ModuleList()
for primitive in space:
op = OPS[primitive](C, C, stride, affine, track_running_stats)
self._ops.append(op)
def forward_gdas(self, x, weights, index):
return self._ops[index](x) * weights[index]
def forward_darts(self, x, weights):
return sum(w * op(x) for w, op in zip(weights, self._ops))
# Learning Transferable Architectures for Scalable Image Recognition, CVPR 2018
class NASNetSearchCell(nn.Module):
def __init__(self, space, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev, affine, track_running_stats):
super(NASNetSearchCell, self).__init__()
self.reduction = reduction
self.op_names = deepcopy(space)
if reduction_prev: self.preprocess0 = OPS['skip_connect'](C_prev_prev, C, 2, affine, track_running_stats)
else : self.preprocess0 = OPS['nor_conv_1x1'](C_prev_prev, C, 1, affine, track_running_stats)
self.preprocess1 = OPS['nor_conv_1x1'](C_prev, C, 1, affine, track_running_stats)
self._steps = steps
self._multiplier = multiplier
self._ops = nn.ModuleList()
self.edges = nn.ModuleDict()
for i in range(self._steps):
for j in range(2+i):
node_str = '{:}<-{:}'.format(i, j) # indicate the edge from node-(j) to node-(i+2)
stride = 2 if reduction and j < 2 else 1
op = MixedOp(space, C, stride, affine, track_running_stats)
self.edges[ node_str ] = op
self.edge_keys = sorted(list(self.edges.keys()))
self.edge2index = {key:i for i, key in enumerate(self.edge_keys)}
self.num_edges = len(self.edges)
def forward_gdas(self, s0, s1, weightss, indexs):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
node_str = '{:}<-{:}'.format(i, j)
op = self.edges[ node_str ]
weights = weightss[ self.edge2index[node_str] ]
index = indexs[ self.edge2index[node_str] ].item()
clist.append( op.forward_gdas(h, weights, index) )
states.append( sum(clist) )
return torch.cat(states[-self._multiplier:], dim=1)
def forward_darts(self, s0, s1, weightss):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
for i in range(self._steps):
clist = []
for j, h in enumerate(states):
node_str = '{:}<-{:}'.format(i, j)
op = self.edges[ node_str ]
weights = weightss[ self.edge2index[node_str] ]
clist.append( op.forward_darts(h, weights) )
states.append( sum(clist) )
return torch.cat(states[-self._multiplier:], dim=1)

View File

@@ -0,0 +1,97 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
########################################################
# DARTS: Differentiable Architecture Search, ICLR 2019 #
########################################################
import torch
import torch.nn as nn
from copy import deepcopy
from ..cell_operations import ResNetBasicblock
from .search_cells import NAS201SearchCell as SearchCell
from .genotypes import Structure
class TinyNetworkDarts(nn.Module):
def __init__(self, C, N, max_nodes, num_classes, search_space, affine, track_running_stats):
super(TinyNetworkDarts, self).__init__()
self._C = C
self._layerN = N
self.max_nodes = max_nodes
self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C))
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
C_prev, num_edge, edge2index = C, None, None
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2)
else:
cell = SearchCell(C_prev, C_curr, 1, max_nodes, search_space, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev = cell.out_dim
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.arch_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
def get_weights(self):
xlist = list( self.stem.parameters() ) + list( self.cells.parameters() )
xlist+= list( self.lastact.parameters() ) + list( self.global_pooling.parameters() )
xlist+= list( self.classifier.parameters() )
return xlist
def get_alphas(self):
return [self.arch_parameters]
def show_alphas(self):
with torch.no_grad():
return 'arch-parameters :\n{:}'.format( nn.functional.softmax(self.arch_parameters, dim=-1).cpu() )
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, Max-Nodes={max_nodes}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def genotype(self):
genotypes = []
for i in range(1, self.max_nodes):
xlist = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
with torch.no_grad():
weights = self.arch_parameters[ self.edge2index[node_str] ]
op_name = self.op_names[ weights.argmax().item() ]
xlist.append((op_name, j))
genotypes.append( tuple(xlist) )
return Structure( genotypes )
def forward(self, inputs):
alphas = nn.functional.softmax(self.arch_parameters, dim=-1)
feature = self.stem(inputs)
for i, cell in enumerate(self.cells):
if isinstance(cell, SearchCell):
feature = cell(feature, alphas)
else:
feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,108 @@
####################
# DARTS, ICLR 2019 #
####################
import torch
import torch.nn as nn
from copy import deepcopy
from typing import List, Text, Dict
from .search_cells import NASNetSearchCell as SearchCell
# The macro structure is based on NASNet
class NASNetworkDARTS(nn.Module):
def __init__(self, C: int, N: int, steps: int, multiplier: int, stem_multiplier: int,
num_classes: int, search_space: List[Text], affine: bool, track_running_stats: bool):
super(NASNetworkDARTS, self).__init__()
self._C = C
self._layerN = N
self._steps = steps
self._multiplier = multiplier
self.stem = nn.Sequential(
nn.Conv2d(3, C*stem_multiplier, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C*stem_multiplier))
# config for each layer
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * (N-1) + [C*4 ] + [C*4 ] * (N-1)
layer_reductions = [False] * N + [True] + [False] * (N-1) + [True] + [False] * (N-1)
num_edge, edge2index = None, None
C_prev_prev, C_prev, C_curr, reduction_prev = C*stem_multiplier, C*stem_multiplier, C, False
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
cell = SearchCell(search_space, steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev_prev, C_prev, reduction_prev = C_prev, multiplier*C_curr, reduction
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.arch_normal_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
self.arch_reduce_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
def get_weights(self) -> List[torch.nn.Parameter]:
xlist = list( self.stem.parameters() ) + list( self.cells.parameters() )
xlist+= list( self.lastact.parameters() ) + list( self.global_pooling.parameters() )
xlist+= list( self.classifier.parameters() )
return xlist
def get_alphas(self) -> List[torch.nn.Parameter]:
return [self.arch_normal_parameters, self.arch_reduce_parameters]
def show_alphas(self) -> Text:
with torch.no_grad():
A = 'arch-normal-parameters :\n{:}'.format( nn.functional.softmax(self.arch_normal_parameters, dim=-1).cpu() )
B = 'arch-reduce-parameters :\n{:}'.format( nn.functional.softmax(self.arch_reduce_parameters, dim=-1).cpu() )
return '{:}\n{:}'.format(A, B)
def get_message(self) -> Text:
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self) -> Text:
return ('{name}(C={_C}, N={_layerN}, steps={_steps}, multiplier={_multiplier}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def genotype(self) -> Dict[Text, List]:
def _parse(weights):
gene = []
for i in range(self._steps):
edges = []
for j in range(2+i):
node_str = '{:}<-{:}'.format(i, j)
ws = weights[ self.edge2index[node_str] ]
for k, op_name in enumerate(self.op_names):
if op_name == 'none': continue
edges.append( (op_name, j, ws[k]) )
edges = sorted(edges, key=lambda x: -x[-1])
selected_edges = edges[:2]
gene.append( tuple(selected_edges) )
return gene
with torch.no_grad():
gene_normal = _parse(torch.softmax(self.arch_normal_parameters, dim=-1).cpu().numpy())
gene_reduce = _parse(torch.softmax(self.arch_reduce_parameters, dim=-1).cpu().numpy())
return {'normal': gene_normal, 'normal_concat': list(range(2+self._steps-self._multiplier, self._steps+2)),
'reduce': gene_reduce, 'reduce_concat': list(range(2+self._steps-self._multiplier, self._steps+2))}
def forward(self, inputs):
normal_w = nn.functional.softmax(self.arch_normal_parameters, dim=1)
reduce_w = nn.functional.softmax(self.arch_reduce_parameters, dim=1)
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction: ww = reduce_w
else : ww = normal_w
s0, s1 = s1, cell.forward_darts(s0, s1, ww)
out = self.lastact(s1)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,94 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##########################################################################
# Efficient Neural Architecture Search via Parameters Sharing, ICML 2018 #
##########################################################################
import torch
import torch.nn as nn
from copy import deepcopy
from ..cell_operations import ResNetBasicblock
from .search_cells import NAS201SearchCell as SearchCell
from .genotypes import Structure
from .search_model_enas_utils import Controller
class TinyNetworkENAS(nn.Module):
def __init__(self, C, N, max_nodes, num_classes, search_space, affine, track_running_stats):
super(TinyNetworkENAS, self).__init__()
self._C = C
self._layerN = N
self.max_nodes = max_nodes
self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C))
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
C_prev, num_edge, edge2index = C, None, None
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2)
else:
cell = SearchCell(C_prev, C_curr, 1, max_nodes, search_space, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev = cell.out_dim
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
# to maintain the sampled architecture
self.sampled_arch = None
def update_arch(self, _arch):
if _arch is None:
self.sampled_arch = None
elif isinstance(_arch, Structure):
self.sampled_arch = _arch
elif isinstance(_arch, (list, tuple)):
genotypes = []
for i in range(1, self.max_nodes):
xlist = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
op_index = _arch[ self.edge2index[node_str] ]
op_name = self.op_names[ op_index ]
xlist.append((op_name, j))
genotypes.append( tuple(xlist) )
self.sampled_arch = Structure(genotypes)
else:
raise ValueError('invalid type of input architecture : {:}'.format(_arch))
return self.sampled_arch
def create_controller(self):
return Controller(len(self.edge2index), len(self.op_names))
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, Max-Nodes={max_nodes}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def forward(self, inputs):
feature = self.stem(inputs)
for i, cell in enumerate(self.cells):
if isinstance(cell, SearchCell):
feature = cell.forward_dynamic(feature, self.sampled_arch)
else: feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,55 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##########################################################################
# Efficient Neural Architecture Search via Parameters Sharing, ICML 2018 #
##########################################################################
import torch
import torch.nn as nn
from torch.distributions.categorical import Categorical
class Controller(nn.Module):
# we refer to https://github.com/TDeVries/enas_pytorch/blob/master/models/controller.py
def __init__(self, num_edge, num_ops, lstm_size=32, lstm_num_layers=2, tanh_constant=2.5, temperature=5.0):
super(Controller, self).__init__()
# assign the attributes
self.num_edge = num_edge
self.num_ops = num_ops
self.lstm_size = lstm_size
self.lstm_N = lstm_num_layers
self.tanh_constant = tanh_constant
self.temperature = temperature
# create parameters
self.register_parameter('input_vars', nn.Parameter(torch.Tensor(1, 1, lstm_size)))
self.w_lstm = nn.LSTM(input_size=self.lstm_size, hidden_size=self.lstm_size, num_layers=self.lstm_N)
self.w_embd = nn.Embedding(self.num_ops, self.lstm_size)
self.w_pred = nn.Linear(self.lstm_size, self.num_ops)
nn.init.uniform_(self.input_vars , -0.1, 0.1)
nn.init.uniform_(self.w_lstm.weight_hh_l0, -0.1, 0.1)
nn.init.uniform_(self.w_lstm.weight_ih_l0, -0.1, 0.1)
nn.init.uniform_(self.w_embd.weight , -0.1, 0.1)
nn.init.uniform_(self.w_pred.weight , -0.1, 0.1)
def forward(self):
inputs, h0 = self.input_vars, None
log_probs, entropys, sampled_arch = [], [], []
for iedge in range(self.num_edge):
outputs, h0 = self.w_lstm(inputs, h0)
logits = self.w_pred(outputs)
logits = logits / self.temperature
logits = self.tanh_constant * torch.tanh(logits)
# distribution
op_distribution = Categorical(logits=logits)
op_index = op_distribution.sample()
sampled_arch.append( op_index.item() )
op_log_prob = op_distribution.log_prob(op_index)
log_probs.append( op_log_prob.view(-1) )
op_entropy = op_distribution.entropy()
entropys.append( op_entropy.view(-1) )
# obtain the input embedding for the next step
inputs = self.w_embd(op_index)
return torch.sum(torch.cat(log_probs)), torch.sum(torch.cat(entropys)), sampled_arch

View File

@@ -0,0 +1,111 @@
###########################################################################
# Searching for A Robust Neural Architecture in Four GPU Hours, CVPR 2019 #
###########################################################################
import torch
import torch.nn as nn
from copy import deepcopy
from ..cell_operations import ResNetBasicblock
from .search_cells import NAS201SearchCell as SearchCell
from .genotypes import Structure
class TinyNetworkGDAS(nn.Module):
#def __init__(self, C, N, max_nodes, num_classes, search_space, affine=False, track_running_stats=True):
def __init__(self, C, N, max_nodes, num_classes, search_space, affine, track_running_stats):
super(TinyNetworkGDAS, self).__init__()
self._C = C
self._layerN = N
self.max_nodes = max_nodes
self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C))
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
C_prev, num_edge, edge2index = C, None, None
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2)
else:
cell = SearchCell(C_prev, C_curr, 1, max_nodes, search_space, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev = cell.out_dim
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.arch_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
self.tau = 10
def get_weights(self):
xlist = list( self.stem.parameters() ) + list( self.cells.parameters() )
xlist+= list( self.lastact.parameters() ) + list( self.global_pooling.parameters() )
xlist+= list( self.classifier.parameters() )
return xlist
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def get_alphas(self):
return [self.arch_parameters]
def show_alphas(self):
with torch.no_grad():
return 'arch-parameters :\n{:}'.format( nn.functional.softmax(self.arch_parameters, dim=-1).cpu() )
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, Max-Nodes={max_nodes}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def genotype(self):
genotypes = []
for i in range(1, self.max_nodes):
xlist = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
with torch.no_grad():
weights = self.arch_parameters[ self.edge2index[node_str] ]
op_name = self.op_names[ weights.argmax().item() ]
xlist.append((op_name, j))
genotypes.append( tuple(xlist) )
return Structure( genotypes )
def forward(self, inputs):
while True:
gumbels = -torch.empty_like(self.arch_parameters).exponential_().log()
logits = (self.arch_parameters.log_softmax(dim=1) + gumbels) / self.tau
probs = nn.functional.softmax(logits, dim=1)
index = probs.max(-1, keepdim=True)[1]
one_h = torch.zeros_like(logits).scatter_(-1, index, 1.0)
hardwts = one_h - probs.detach() + probs
if (torch.isinf(gumbels).any()) or (torch.isinf(probs).any()) or (torch.isnan(probs).any()):
continue
else: break
feature = self.stem(inputs)
for i, cell in enumerate(self.cells):
if isinstance(cell, SearchCell):
feature = cell.forward_gdas(feature, hardwts, index)
else:
feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,125 @@
###########################################################################
# Searching for A Robust Neural Architecture in Four GPU Hours, CVPR 2019 #
###########################################################################
import torch
import torch.nn as nn
from copy import deepcopy
from .search_cells import NASNetSearchCell as SearchCell
# The macro structure is based on NASNet
class NASNetworkGDAS(nn.Module):
def __init__(self, C, N, steps, multiplier, stem_multiplier, num_classes, search_space, affine, track_running_stats):
super(NASNetworkGDAS, self).__init__()
self._C = C
self._layerN = N
self._steps = steps
self._multiplier = multiplier
self.stem = nn.Sequential(
nn.Conv2d(3, C*stem_multiplier, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C*stem_multiplier))
# config for each layer
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * (N-1) + [C*4 ] + [C*4 ] * (N-1)
layer_reductions = [False] * N + [True] + [False] * (N-1) + [True] + [False] * (N-1)
num_edge, edge2index = None, None
C_prev_prev, C_prev, C_curr, reduction_prev = C*stem_multiplier, C*stem_multiplier, C, False
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
cell = SearchCell(search_space, steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev_prev, C_prev, reduction_prev = C_prev, multiplier*C_curr, reduction
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.arch_normal_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
self.arch_reduce_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
self.tau = 10
def get_weights(self):
xlist = list( self.stem.parameters() ) + list( self.cells.parameters() )
xlist+= list( self.lastact.parameters() ) + list( self.global_pooling.parameters() )
xlist+= list( self.classifier.parameters() )
return xlist
def set_tau(self, tau):
self.tau = tau
def get_tau(self):
return self.tau
def get_alphas(self):
return [self.arch_normal_parameters, self.arch_reduce_parameters]
def show_alphas(self):
with torch.no_grad():
A = 'arch-normal-parameters :\n{:}'.format( nn.functional.softmax(self.arch_normal_parameters, dim=-1).cpu() )
B = 'arch-reduce-parameters :\n{:}'.format( nn.functional.softmax(self.arch_reduce_parameters, dim=-1).cpu() )
return '{:}\n{:}'.format(A, B)
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, N={_layerN}, steps={_steps}, multiplier={_multiplier}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def genotype(self):
def _parse(weights):
gene = []
for i in range(self._steps):
edges = []
for j in range(2+i):
node_str = '{:}<-{:}'.format(i, j)
ws = weights[ self.edge2index[node_str] ]
for k, op_name in enumerate(self.op_names):
if op_name == 'none': continue
edges.append( (op_name, j, ws[k]) )
edges = sorted(edges, key=lambda x: -x[-1])
selected_edges = edges[:2]
gene.append( tuple(selected_edges) )
return gene
with torch.no_grad():
gene_normal = _parse(torch.softmax(self.arch_normal_parameters, dim=-1).cpu().numpy())
gene_reduce = _parse(torch.softmax(self.arch_reduce_parameters, dim=-1).cpu().numpy())
return {'normal': gene_normal, 'normal_concat': list(range(2+self._steps-self._multiplier, self._steps+2)),
'reduce': gene_reduce, 'reduce_concat': list(range(2+self._steps-self._multiplier, self._steps+2))}
def forward(self, inputs):
def get_gumbel_prob(xins):
while True:
gumbels = -torch.empty_like(xins).exponential_().log()
logits = (xins.log_softmax(dim=1) + gumbels) / self.tau
probs = nn.functional.softmax(logits, dim=1)
index = probs.max(-1, keepdim=True)[1]
one_h = torch.zeros_like(logits).scatter_(-1, index, 1.0)
hardwts = one_h - probs.detach() + probs
if (torch.isinf(gumbels).any()) or (torch.isinf(probs).any()) or (torch.isnan(probs).any()):
continue
else: break
return hardwts, index
normal_hardwts, normal_index = get_gumbel_prob(self.arch_normal_parameters)
reduce_hardwts, reduce_index = get_gumbel_prob(self.arch_reduce_parameters)
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
if cell.reduction: hardwts, index = reduce_hardwts, reduce_index
else : hardwts, index = normal_hardwts, normal_index
s0, s1 = s1, cell.forward_gdas(s0, s1, hardwts, index)
out = self.lastact(s1)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,81 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##############################################################################
# Random Search and Reproducibility for Neural Architecture Search, UAI 2019 #
##############################################################################
import torch, random
import torch.nn as nn
from copy import deepcopy
from ..cell_operations import ResNetBasicblock
from .search_cells import NAS201SearchCell as SearchCell
from .genotypes import Structure
class TinyNetworkRANDOM(nn.Module):
def __init__(self, C, N, max_nodes, num_classes, search_space, affine, track_running_stats):
super(TinyNetworkRANDOM, self).__init__()
self._C = C
self._layerN = N
self.max_nodes = max_nodes
self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C))
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
C_prev, num_edge, edge2index = C, None, None
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2)
else:
cell = SearchCell(C_prev, C_curr, 1, max_nodes, search_space, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev = cell.out_dim
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.arch_cache = None
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, Max-Nodes={max_nodes}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def random_genotype(self, set_cache):
genotypes = []
for i in range(1, self.max_nodes):
xlist = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
op_name = random.choice( self.op_names )
xlist.append((op_name, j))
genotypes.append( tuple(xlist) )
arch = Structure( genotypes )
if set_cache: self.arch_cache = arch
return arch
def forward(self, inputs):
feature = self.stem(inputs)
for i, cell in enumerate(self.cells):
if isinstance(cell, SearchCell):
feature = cell.forward_dynamic(feature, self.arch_cache)
else: feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,152 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
######################################################################################
# One-Shot Neural Architecture Search via Self-Evaluated Template Network, ICCV 2019 #
######################################################################################
import torch, random
import torch.nn as nn
from copy import deepcopy
from ..cell_operations import ResNetBasicblock
from .search_cells import NAS201SearchCell as SearchCell
from .genotypes import Structure
class TinyNetworkSETN(nn.Module):
def __init__(self, C, N, max_nodes, num_classes, search_space, affine, track_running_stats):
super(TinyNetworkSETN, self).__init__()
self._C = C
self._layerN = N
self.max_nodes = max_nodes
self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C))
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
C_prev, num_edge, edge2index = C, None, None
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2)
else:
cell = SearchCell(C_prev, C_curr, 1, max_nodes, search_space, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev = cell.out_dim
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.arch_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
self.mode = 'urs'
self.dynamic_cell = None
def set_cal_mode(self, mode, dynamic_cell=None):
assert mode in ['urs', 'joint', 'select', 'dynamic']
self.mode = mode
if mode == 'dynamic': self.dynamic_cell = deepcopy( dynamic_cell )
else : self.dynamic_cell = None
def get_cal_mode(self):
return self.mode
def get_weights(self):
xlist = list( self.stem.parameters() ) + list( self.cells.parameters() )
xlist+= list( self.lastact.parameters() ) + list( self.global_pooling.parameters() )
xlist+= list( self.classifier.parameters() )
return xlist
def get_alphas(self):
return [self.arch_parameters]
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, Max-Nodes={max_nodes}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def genotype(self):
genotypes = []
for i in range(1, self.max_nodes):
xlist = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
with torch.no_grad():
weights = self.arch_parameters[ self.edge2index[node_str] ]
op_name = self.op_names[ weights.argmax().item() ]
xlist.append((op_name, j))
genotypes.append( tuple(xlist) )
return Structure( genotypes )
def dync_genotype(self, use_random=False):
genotypes = []
with torch.no_grad():
alphas_cpu = nn.functional.softmax(self.arch_parameters, dim=-1)
for i in range(1, self.max_nodes):
xlist = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
if use_random:
op_name = random.choice(self.op_names)
else:
weights = alphas_cpu[ self.edge2index[node_str] ]
op_index = torch.multinomial(weights, 1).item()
op_name = self.op_names[ op_index ]
xlist.append((op_name, j))
genotypes.append( tuple(xlist) )
return Structure( genotypes )
def get_log_prob(self, arch):
with torch.no_grad():
logits = nn.functional.log_softmax(self.arch_parameters, dim=-1)
select_logits = []
for i, node_info in enumerate(arch.nodes):
for op, xin in node_info:
node_str = '{:}<-{:}'.format(i+1, xin)
op_index = self.op_names.index(op)
select_logits.append( logits[self.edge2index[node_str], op_index] )
return sum(select_logits).item()
def return_topK(self, K):
archs = Structure.gen_all(self.op_names, self.max_nodes, False)
pairs = [(self.get_log_prob(arch), arch) for arch in archs]
if K < 0 or K >= len(archs): K = len(archs)
sorted_pairs = sorted(pairs, key=lambda x: -x[0])
return_pairs = [sorted_pairs[_][1] for _ in range(K)]
return return_pairs
def forward(self, inputs):
alphas = nn.functional.softmax(self.arch_parameters, dim=-1)
with torch.no_grad():
alphas_cpu = alphas.detach().cpu()
feature = self.stem(inputs)
for i, cell in enumerate(self.cells):
if isinstance(cell, SearchCell):
if self.mode == 'urs':
feature = cell.forward_urs(feature)
elif self.mode == 'select':
feature = cell.forward_select(feature, alphas_cpu)
elif self.mode == 'joint':
feature = cell.forward_joint(feature, alphas)
elif self.mode == 'dynamic':
feature = cell.forward_dynamic(feature, self.dynamic_cell)
else: raise ValueError('invalid mode={:}'.format(self.mode))
else: feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,139 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
######################################################################################
# One-Shot Neural Architecture Search via Self-Evaluated Template Network, ICCV 2019 #
######################################################################################
import torch
import torch.nn as nn
from copy import deepcopy
from typing import List, Text, Dict
from .search_cells import NASNetSearchCell as SearchCell
# The macro structure is based on NASNet
class NASNetworkSETN(nn.Module):
def __init__(self, C: int, N: int, steps: int, multiplier: int, stem_multiplier: int,
num_classes: int, search_space: List[Text], affine: bool, track_running_stats: bool):
super(NASNetworkSETN, self).__init__()
self._C = C
self._layerN = N
self._steps = steps
self._multiplier = multiplier
self.stem = nn.Sequential(
nn.Conv2d(3, C*stem_multiplier, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C*stem_multiplier))
# config for each layer
layer_channels = [C ] * N + [C*2 ] + [C*2 ] * (N-1) + [C*4 ] + [C*4 ] * (N-1)
layer_reductions = [False] * N + [True] + [False] * (N-1) + [True] + [False] * (N-1)
num_edge, edge2index = None, None
C_prev_prev, C_prev, C_curr, reduction_prev = C*stem_multiplier, C*stem_multiplier, C, False
self.cells = nn.ModuleList()
for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
cell = SearchCell(search_space, steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, affine, track_running_stats)
if num_edge is None: num_edge, edge2index = cell.num_edges, cell.edge2index
else: assert num_edge == cell.num_edges and edge2index == cell.edge2index, 'invalid {:} vs. {:}.'.format(num_edge, cell.num_edges)
self.cells.append( cell )
C_prev_prev, C_prev, reduction_prev = C_prev, multiplier*C_curr, reduction
self.op_names = deepcopy( search_space )
self._Layer = len(self.cells)
self.edge2index = edge2index
self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, num_classes)
self.arch_normal_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
self.arch_reduce_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(search_space)) )
self.mode = 'urs'
self.dynamic_cell = None
def set_cal_mode(self, mode, dynamic_cell=None):
assert mode in ['urs', 'joint', 'select', 'dynamic']
self.mode = mode
if mode == 'dynamic':
self.dynamic_cell = deepcopy(dynamic_cell)
else:
self.dynamic_cell = None
def get_weights(self):
xlist = list( self.stem.parameters() ) + list( self.cells.parameters() )
xlist+= list( self.lastact.parameters() ) + list( self.global_pooling.parameters() )
xlist+= list( self.classifier.parameters() )
return xlist
def get_alphas(self):
return [self.arch_normal_parameters, self.arch_reduce_parameters]
def show_alphas(self):
with torch.no_grad():
A = 'arch-normal-parameters :\n{:}'.format( nn.functional.softmax(self.arch_normal_parameters, dim=-1).cpu() )
B = 'arch-reduce-parameters :\n{:}'.format( nn.functional.softmax(self.arch_reduce_parameters, dim=-1).cpu() )
return '{:}\n{:}'.format(A, B)
def get_message(self):
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_C}, N={_layerN}, steps={_steps}, multiplier={_multiplier}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
def dync_genotype(self, use_random=False):
genotypes = []
with torch.no_grad():
alphas_cpu = nn.functional.softmax(self.arch_parameters, dim=-1)
for i in range(1, self.max_nodes):
xlist = []
for j in range(i):
node_str = '{:}<-{:}'.format(i, j)
if use_random:
op_name = random.choice(self.op_names)
else:
weights = alphas_cpu[ self.edge2index[node_str] ]
op_index = torch.multinomial(weights, 1).item()
op_name = self.op_names[ op_index ]
xlist.append((op_name, j))
genotypes.append( tuple(xlist) )
return Structure( genotypes )
def genotype(self):
def _parse(weights):
gene = []
for i in range(self._steps):
edges = []
for j in range(2+i):
node_str = '{:}<-{:}'.format(i, j)
ws = weights[ self.edge2index[node_str] ]
for k, op_name in enumerate(self.op_names):
if op_name == 'none': continue
edges.append( (op_name, j, ws[k]) )
edges = sorted(edges, key=lambda x: -x[-1])
selected_edges = edges[:2]
gene.append( tuple(selected_edges) )
return gene
with torch.no_grad():
gene_normal = _parse(torch.softmax(self.arch_normal_parameters, dim=-1).cpu().numpy())
gene_reduce = _parse(torch.softmax(self.arch_reduce_parameters, dim=-1).cpu().numpy())
return {'normal': gene_normal, 'normal_concat': list(range(2+self._steps-self._multiplier, self._steps+2)),
'reduce': gene_reduce, 'reduce_concat': list(range(2+self._steps-self._multiplier, self._steps+2))}
def forward(self, inputs):
normal_hardwts = nn.functional.softmax(self.arch_normal_parameters, dim=-1)
reduce_hardwts = nn.functional.softmax(self.arch_reduce_parameters, dim=-1)
s0 = s1 = self.stem(inputs)
for i, cell in enumerate(self.cells):
# [TODO]
raise NotImplementedError
if cell.reduction: hardwts, index = reduce_hardwts, reduce_index
else : hardwts, index = normal_hardwts, normal_index
s0, s1 = s1, cell.forward_gdas(s0, s1, hardwts, index)
out = self.lastact(s1)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,62 @@
import torch
import torch.nn as nn
def copy_conv(module, init):
assert isinstance(module, nn.Conv2d), 'invalid module : {:}'.format(module)
assert isinstance(init , nn.Conv2d), 'invalid module : {:}'.format(init)
new_i, new_o = module.in_channels, module.out_channels
module.weight.copy_( init.weight.detach()[:new_o, :new_i] )
if module.bias is not None:
module.bias.copy_( init.bias.detach()[:new_o] )
def copy_bn (module, init):
assert isinstance(module, nn.BatchNorm2d), 'invalid module : {:}'.format(module)
assert isinstance(init , nn.BatchNorm2d), 'invalid module : {:}'.format(init)
num_features = module.num_features
if module.weight is not None:
module.weight.copy_( init.weight.detach()[:num_features] )
if module.bias is not None:
module.bias.copy_( init.bias.detach()[:num_features] )
if module.running_mean is not None:
module.running_mean.copy_( init.running_mean.detach()[:num_features] )
if module.running_var is not None:
module.running_var.copy_( init.running_var.detach()[:num_features] )
def copy_fc (module, init):
assert isinstance(module, nn.Linear), 'invalid module : {:}'.format(module)
assert isinstance(init , nn.Linear), 'invalid module : {:}'.format(init)
new_i, new_o = module.in_features, module.out_features
module.weight.copy_( init.weight.detach()[:new_o, :new_i] )
if module.bias is not None:
module.bias.copy_( init.bias.detach()[:new_o] )
def copy_base(module, init):
assert type(module).__name__ in ['ConvBNReLU', 'Downsample'], 'invalid module : {:}'.format(module)
assert type( init).__name__ in ['ConvBNReLU', 'Downsample'], 'invalid module : {:}'.format( init)
if module.conv is not None:
copy_conv(module.conv, init.conv)
if module.bn is not None:
copy_bn (module.bn, init.bn)
def copy_basic(module, init):
copy_base(module.conv_a, init.conv_a)
copy_base(module.conv_b, init.conv_b)
if module.downsample is not None:
if init.downsample is not None:
copy_base(module.downsample, init.downsample)
#else:
# import pdb; pdb.set_trace()
def init_from_model(network, init_model):
with torch.no_grad():
copy_fc(network.classifier, init_model.classifier)
for base, target in zip(init_model.layers, network.layers):
assert type(base).__name__ == type(target).__name__, 'invalid type : {:} vs {:}'.format(base, target)
if type(base).__name__ == 'ConvBNReLU':
copy_base(target, base)
elif type(base).__name__ == 'ResNetBasicblock':
copy_basic(target, base)
else:
raise ValueError('unknown type name : {:}'.format( type(base).__name__ ))

View File

@@ -0,0 +1,18 @@
import torch
import torch.nn as nn
def initialize_resnet(m):
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)

View File

@@ -0,0 +1,167 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch.nn as nn
import torch.nn.functional as F
from ..initialization import initialize_resnet
class ConvBNReLU(nn.Module):
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
if has_bn : self.bn = nn.BatchNorm2d(nOut)
else : self.bn = None
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
def forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.bn : out = self.bn( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
return out
class ResNetBasicblock(nn.Module):
num_conv = 2
expansion = 1
def __init__(self, iCs, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
assert isinstance(iCs, tuple) or isinstance(iCs, list), 'invalid type of iCs : {:}'.format( iCs )
assert len(iCs) == 3,'invalid lengths of iCs : {:}'.format(iCs)
self.conv_a = ConvBNReLU(iCs[0], iCs[1], 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU(iCs[1], iCs[2], 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
residual_in = iCs[0]
if stride == 2:
self.downsample = ConvBNReLU(iCs[0], iCs[2], 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
residual_in = iCs[2]
elif iCs[0] != iCs[2]:
self.downsample = ConvBNReLU(iCs[0], iCs[2], 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
#self.out_dim = max(residual_in, iCs[2])
self.out_dim = iCs[2]
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + basicblock
return F.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, iCs, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
assert isinstance(iCs, tuple) or isinstance(iCs, list), 'invalid type of iCs : {:}'.format( iCs )
assert len(iCs) == 4,'invalid lengths of iCs : {:}'.format(iCs)
self.conv_1x1 = ConvBNReLU(iCs[0], iCs[1], 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU(iCs[1], iCs[2], 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(iCs[2], iCs[3], 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
residual_in = iCs[0]
if stride == 2:
self.downsample = ConvBNReLU(iCs[0], iCs[3], 1, 1, 0, False, has_avg=True , has_bn=False, has_relu=False)
residual_in = iCs[3]
elif iCs[0] != iCs[3]:
self.downsample = ConvBNReLU(iCs[0], iCs[3], 1, 1, 0, False, has_avg=False, has_bn=False, has_relu=False)
residual_in = iCs[3]
else:
self.downsample = None
#self.out_dim = max(residual_in, iCs[3])
self.out_dim = iCs[3]
def forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + bottleneck
return F.relu(out, inplace=True)
class InferCifarResNet(nn.Module):
def __init__(self, block_name, depth, xblocks, xchannels, num_classes, zero_init_residual):
super(InferCifarResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'ResNetBasicblock':
block = ResNetBasicblock
assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 2) // 6
elif block_name == 'ResNetBottleneck':
block = ResNetBottleneck
assert (depth - 2) % 9 == 0, 'depth should be one of 164'
layer_blocks = (depth - 2) // 9
else:
raise ValueError('invalid block : {:}'.format(block_name))
assert len(xblocks) == 3, 'invalid xblocks : {:}'.format(xblocks)
self.message = 'InferWidthCifarResNet : Depth : {:} , Layers for each block : {:}'.format(depth, layer_blocks)
self.num_classes = num_classes
self.xchannels = xchannels
self.layers = nn.ModuleList( [ ConvBNReLU(xchannels[0], xchannels[1], 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
last_channel_idx = 1
for stage in range(3):
for iL in range(layer_blocks):
num_conv = block.num_conv
iCs = self.xchannels[last_channel_idx:last_channel_idx+num_conv+1]
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iCs, stride)
last_channel_idx += num_conv
self.xchannels[last_channel_idx] = module.out_dim
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iCs={:}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iCs, module.out_dim, stride)
if iL + 1 == xblocks[stage]: # reach the maximum depth
out_channel = module.out_dim
for iiL in range(iL+1, layer_blocks):
last_channel_idx += num_conv
self.xchannels[last_channel_idx] = module.out_dim
break
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(self.xchannels[-1], num_classes)
self.apply(initialize_resnet)
if zero_init_residual:
for m in self.modules():
if isinstance(m, ResNetBasicblock):
nn.init.constant_(m.conv_b.bn.weight, 0)
elif isinstance(m, ResNetBottleneck):
nn.init.constant_(m.conv_1x4.bn.weight, 0)
def get_message(self):
return self.message
def forward(self, inputs):
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,150 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch.nn as nn
import torch.nn.functional as F
from ..initialization import initialize_resnet
class ConvBNReLU(nn.Module):
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
if has_bn : self.bn = nn.BatchNorm2d(nOut)
else : self.bn = None
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
def forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.bn : out = self.bn( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
return out
class ResNetBasicblock(nn.Module):
num_conv = 2
expansion = 1
def __init__(self, inplanes, planes, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ConvBNReLU(inplanes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU( planes, planes, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + basicblock
return F.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, inplanes, planes, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_1x1 = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU( planes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(planes, planes*self.expansion, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=True , has_bn=False, has_relu=False)
elif inplanes != planes*self.expansion:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=False, has_bn=False, has_relu=False)
else:
self.downsample = None
self.out_dim = planes*self.expansion
def forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + bottleneck
return F.relu(out, inplace=True)
class InferDepthCifarResNet(nn.Module):
def __init__(self, block_name, depth, xblocks, num_classes, zero_init_residual):
super(InferDepthCifarResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'ResNetBasicblock':
block = ResNetBasicblock
assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 2) // 6
elif block_name == 'ResNetBottleneck':
block = ResNetBottleneck
assert (depth - 2) % 9 == 0, 'depth should be one of 164'
layer_blocks = (depth - 2) // 9
else:
raise ValueError('invalid block : {:}'.format(block_name))
assert len(xblocks) == 3, 'invalid xblocks : {:}'.format(xblocks)
self.message = 'InferWidthCifarResNet : Depth : {:} , Layers for each block : {:}'.format(depth, layer_blocks)
self.num_classes = num_classes
self.layers = nn.ModuleList( [ ConvBNReLU(3, 16, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
self.channels = [16]
for stage in range(3):
for iL in range(layer_blocks):
iC = self.channels[-1]
planes = 16 * (2**stage)
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iC, planes, stride)
self.channels.append( module.out_dim )
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, planes, module.out_dim, stride)
if iL + 1 == xblocks[stage]: # reach the maximum depth
break
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(self.channels[-1], num_classes)
self.apply(initialize_resnet)
if zero_init_residual:
for m in self.modules():
if isinstance(m, ResNetBasicblock):
nn.init.constant_(m.conv_b.bn.weight, 0)
elif isinstance(m, ResNetBottleneck):
nn.init.constant_(m.conv_1x4.bn.weight, 0)
def get_message(self):
return self.message
def forward(self, inputs):
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,160 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch.nn as nn
import torch.nn.functional as F
from ..initialization import initialize_resnet
class ConvBNReLU(nn.Module):
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
if has_bn : self.bn = nn.BatchNorm2d(nOut)
else : self.bn = None
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
def forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.bn : out = self.bn( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
return out
class ResNetBasicblock(nn.Module):
num_conv = 2
expansion = 1
def __init__(self, iCs, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
assert isinstance(iCs, tuple) or isinstance(iCs, list), 'invalid type of iCs : {:}'.format( iCs )
assert len(iCs) == 3,'invalid lengths of iCs : {:}'.format(iCs)
self.conv_a = ConvBNReLU(iCs[0], iCs[1], 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU(iCs[1], iCs[2], 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
residual_in = iCs[0]
if stride == 2:
self.downsample = ConvBNReLU(iCs[0], iCs[2], 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
residual_in = iCs[2]
elif iCs[0] != iCs[2]:
self.downsample = ConvBNReLU(iCs[0], iCs[2], 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
#self.out_dim = max(residual_in, iCs[2])
self.out_dim = iCs[2]
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + basicblock
return F.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, iCs, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
assert isinstance(iCs, tuple) or isinstance(iCs, list), 'invalid type of iCs : {:}'.format( iCs )
assert len(iCs) == 4,'invalid lengths of iCs : {:}'.format(iCs)
self.conv_1x1 = ConvBNReLU(iCs[0], iCs[1], 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU(iCs[1], iCs[2], 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(iCs[2], iCs[3], 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
residual_in = iCs[0]
if stride == 2:
self.downsample = ConvBNReLU(iCs[0], iCs[3], 1, 1, 0, False, has_avg=True , has_bn=False, has_relu=False)
residual_in = iCs[3]
elif iCs[0] != iCs[3]:
self.downsample = ConvBNReLU(iCs[0], iCs[3], 1, 1, 0, False, has_avg=False, has_bn=False, has_relu=False)
residual_in = iCs[3]
else:
self.downsample = None
#self.out_dim = max(residual_in, iCs[3])
self.out_dim = iCs[3]
def forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + bottleneck
return F.relu(out, inplace=True)
class InferWidthCifarResNet(nn.Module):
def __init__(self, block_name, depth, xchannels, num_classes, zero_init_residual):
super(InferWidthCifarResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'ResNetBasicblock':
block = ResNetBasicblock
assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 2) // 6
elif block_name == 'ResNetBottleneck':
block = ResNetBottleneck
assert (depth - 2) % 9 == 0, 'depth should be one of 164'
layer_blocks = (depth - 2) // 9
else:
raise ValueError('invalid block : {:}'.format(block_name))
self.message = 'InferWidthCifarResNet : Depth : {:} , Layers for each block : {:}'.format(depth, layer_blocks)
self.num_classes = num_classes
self.xchannels = xchannels
self.layers = nn.ModuleList( [ ConvBNReLU(xchannels[0], xchannels[1], 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
last_channel_idx = 1
for stage in range(3):
for iL in range(layer_blocks):
num_conv = block.num_conv
iCs = self.xchannels[last_channel_idx:last_channel_idx+num_conv+1]
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iCs, stride)
last_channel_idx += num_conv
self.xchannels[last_channel_idx] = module.out_dim
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iCs={:}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iCs, module.out_dim, stride)
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(self.xchannels[-1], num_classes)
self.apply(initialize_resnet)
if zero_init_residual:
for m in self.modules():
if isinstance(m, ResNetBasicblock):
nn.init.constant_(m.conv_b.bn.weight, 0)
elif isinstance(m, ResNetBottleneck):
nn.init.constant_(m.conv_1x4.bn.weight, 0)
def get_message(self):
return self.message
def forward(self, inputs):
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,170 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
import torch.nn as nn
import torch.nn.functional as F
from ..initialization import initialize_resnet
class ConvBNReLU(nn.Module):
num_conv = 1
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
if has_bn : self.bn = nn.BatchNorm2d(nOut)
else : self.bn = None
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
def forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.bn : out = self.bn( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
return out
class ResNetBasicblock(nn.Module):
num_conv = 2
expansion = 1
def __init__(self, iCs, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
assert isinstance(iCs, tuple) or isinstance(iCs, list), 'invalid type of iCs : {:}'.format( iCs )
assert len(iCs) == 3,'invalid lengths of iCs : {:}'.format(iCs)
self.conv_a = ConvBNReLU(iCs[0], iCs[1], 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU(iCs[1], iCs[2], 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
residual_in = iCs[0]
if stride == 2:
self.downsample = ConvBNReLU(iCs[0], iCs[2], 1, 1, 0, False, has_avg=True, has_bn=True, has_relu=False)
residual_in = iCs[2]
elif iCs[0] != iCs[2]:
self.downsample = ConvBNReLU(iCs[0], iCs[2], 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
#self.out_dim = max(residual_in, iCs[2])
self.out_dim = iCs[2]
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + basicblock
return F.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, iCs, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
assert isinstance(iCs, tuple) or isinstance(iCs, list), 'invalid type of iCs : {:}'.format( iCs )
assert len(iCs) == 4,'invalid lengths of iCs : {:}'.format(iCs)
self.conv_1x1 = ConvBNReLU(iCs[0], iCs[1], 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU(iCs[1], iCs[2], 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(iCs[2], iCs[3], 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
residual_in = iCs[0]
if stride == 2:
self.downsample = ConvBNReLU(iCs[0], iCs[3], 1, 1, 0, False, has_avg=True , has_bn=True, has_relu=False)
residual_in = iCs[3]
elif iCs[0] != iCs[3]:
self.downsample = ConvBNReLU(iCs[0], iCs[3], 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
residual_in = iCs[3]
else:
self.downsample = None
#self.out_dim = max(residual_in, iCs[3])
self.out_dim = iCs[3]
def forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None:
residual = self.downsample(inputs)
else:
residual = inputs
out = residual + bottleneck
return F.relu(out, inplace=True)
class InferImagenetResNet(nn.Module):
def __init__(self, block_name, layers, xblocks, xchannels, deep_stem, num_classes, zero_init_residual):
super(InferImagenetResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'BasicBlock':
block = ResNetBasicblock
elif block_name == 'Bottleneck':
block = ResNetBottleneck
else:
raise ValueError('invalid block : {:}'.format(block_name))
assert len(xblocks) == len(layers), 'invalid layers : {:} vs xblocks : {:}'.format(layers, xblocks)
self.message = 'InferImagenetResNet : Depth : {:} -> {:}, Layers for each block : {:}'.format(sum(layers)*block.num_conv, sum(xblocks)*block.num_conv, xblocks)
self.num_classes = num_classes
self.xchannels = xchannels
if not deep_stem:
self.layers = nn.ModuleList( [ ConvBNReLU(xchannels[0], xchannels[1], 7, 2, 3, False, has_avg=False, has_bn=True, has_relu=True) ] )
last_channel_idx = 1
else:
self.layers = nn.ModuleList( [ ConvBNReLU(xchannels[0], xchannels[1], 3, 2, 1, False, has_avg=False, has_bn=True, has_relu=True)
,ConvBNReLU(xchannels[1], xchannels[2], 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
last_channel_idx = 2
self.layers.append( nn.MaxPool2d(kernel_size=3, stride=2, padding=1) )
for stage, layer_blocks in enumerate(layers):
for iL in range(layer_blocks):
num_conv = block.num_conv
iCs = self.xchannels[last_channel_idx:last_channel_idx+num_conv+1]
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iCs, stride)
last_channel_idx += num_conv
self.xchannels[last_channel_idx] = module.out_dim
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iCs={:}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iCs, module.out_dim, stride)
if iL + 1 == xblocks[stage]: # reach the maximum depth
out_channel = module.out_dim
for iiL in range(iL+1, layer_blocks):
last_channel_idx += num_conv
self.xchannels[last_channel_idx] = module.out_dim
break
assert last_channel_idx + 1 == len(self.xchannels), '{:} vs {:}'.format(last_channel_idx, len(self.xchannels))
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
self.classifier = nn.Linear(self.xchannels[-1], num_classes)
self.apply(initialize_resnet)
if zero_init_residual:
for m in self.modules():
if isinstance(m, ResNetBasicblock):
nn.init.constant_(m.conv_b.bn.weight, 0)
elif isinstance(m, ResNetBottleneck):
nn.init.constant_(m.conv_1x4.bn.weight, 0)
def get_message(self):
return self.message
def forward(self, inputs):
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,122 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
# MobileNetV2: Inverted Residuals and Linear Bottlenecks, CVPR 2018
from torch import nn
from ..initialization import initialize_resnet
from ..SharedUtils import parse_channel_info
class ConvBNReLU(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, groups, has_bn=True, has_relu=True):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False)
if has_bn: self.bn = nn.BatchNorm2d(out_planes)
else : self.bn = None
if has_relu: self.relu = nn.ReLU6(inplace=True)
else : self.relu = None
def forward(self, x):
out = self.conv( x )
if self.bn: out = self.bn ( out )
if self.relu: out = self.relu( out )
return out
class InvertedResidual(nn.Module):
def __init__(self, channels, stride, expand_ratio, additive):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2], 'invalid stride : {:}'.format(stride)
assert len(channels) in [2, 3], 'invalid channels : {:}'.format(channels)
if len(channels) == 2:
layers = []
else:
layers = [ConvBNReLU(channels[0], channels[1], 1, 1, 1)]
layers.extend([
# dw
ConvBNReLU(channels[-2], channels[-2], 3, stride, channels[-2]),
# pw-linear
ConvBNReLU(channels[-2], channels[-1], 1, 1, 1, True, False),
])
self.conv = nn.Sequential(*layers)
self.additive = additive
if self.additive and channels[0] != channels[-1]:
self.shortcut = ConvBNReLU(channels[0], channels[-1], 1, 1, 1, True, False)
else:
self.shortcut = None
self.out_dim = channels[-1]
def forward(self, x):
out = self.conv(x)
# if self.additive: return additive_func(out, x)
if self.shortcut: return out + self.shortcut(x)
else : return out
class InferMobileNetV2(nn.Module):
def __init__(self, num_classes, xchannels, xblocks, dropout):
super(InferMobileNetV2, self).__init__()
block = InvertedResidual
inverted_residual_setting = [
# t, c, n, s
[1, 16 , 1, 1],
[6, 24 , 2, 2],
[6, 32 , 3, 2],
[6, 64 , 4, 2],
[6, 96 , 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
assert len(inverted_residual_setting) == len(xblocks), 'invalid number of layers : {:} vs {:}'.format(len(inverted_residual_setting), len(xblocks))
for block_num, ir_setting in zip(xblocks, inverted_residual_setting):
assert block_num <= ir_setting[2], '{:} vs {:}'.format(block_num, ir_setting)
xchannels = parse_channel_info(xchannels)
#for i, chs in enumerate(xchannels):
# if i > 0: assert chs[0] == xchannels[i-1][-1], 'Layer[{:}] is invalid {:} vs {:}'.format(i, xchannels[i-1], chs)
self.xchannels = xchannels
self.message = 'InferMobileNetV2 : xblocks={:}'.format(xblocks)
# building first layer
features = [ConvBNReLU(xchannels[0][0], xchannels[0][1], 3, 2, 1)]
last_channel_idx = 1
# building inverted residual blocks
for stage, (t, c, n, s) in enumerate(inverted_residual_setting):
for i in range(n):
stride = s if i == 0 else 1
additv = True if i > 0 else False
module = block(self.xchannels[last_channel_idx], stride, t, additv)
features.append(module)
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, Cs={:}, stride={:}, expand={:}, original-C={:}".format(stage, i, n, len(features), self.xchannels[last_channel_idx], stride, t, c)
last_channel_idx += 1
if i + 1 == xblocks[stage]:
out_channel = module.out_dim
for iiL in range(i+1, n):
last_channel_idx += 1
self.xchannels[last_channel_idx][0] = module.out_dim
break
# building last several layers
features.append(ConvBNReLU(self.xchannels[last_channel_idx][0], self.xchannels[last_channel_idx][1], 1, 1, 1))
assert last_channel_idx + 2 == len(self.xchannels), '{:} vs {:}'.format(last_channel_idx, len(self.xchannels))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(self.xchannels[last_channel_idx][1], num_classes),
)
# weight initialization
self.apply( initialize_resnet )
def get_message(self):
return self.message
def forward(self, inputs):
features = self.features(inputs)
vectors = features.mean([2, 3])
predicts = self.classifier(vectors)
return features, predicts

View File

@@ -0,0 +1,58 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
from typing import List, Text, Any
import torch.nn as nn
from models.cell_operations import ResNetBasicblock
from models.cell_infers.cells import InferCell
class DynamicShapeTinyNet(nn.Module):
def __init__(self, channels: List[int], genotype: Any, num_classes: int):
super(DynamicShapeTinyNet, self).__init__()
self._channels = channels
if len(channels) % 3 != 2:
raise ValueError('invalid number of layers : {:}'.format(len(channels)))
self._num_stage = N = len(channels) // 3
self.stem = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(channels[0]))
# layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
c_prev = channels[0]
self.cells = nn.ModuleList()
for index, (c_curr, reduction) in enumerate(zip(channels, layer_reductions)):
if reduction : cell = ResNetBasicblock(c_prev, c_curr, 2, True)
else : cell = InferCell(genotype, c_prev, c_curr, 1)
self.cells.append( cell )
c_prev = cell.out_dim
self._num_layer = len(self.cells)
self.lastact = nn.Sequential(nn.BatchNorm2d(c_prev), nn.ReLU(inplace=True))
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(c_prev, num_classes)
def get_message(self) -> Text:
string = self.extra_repr()
for i, cell in enumerate(self.cells):
string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
return string
def extra_repr(self):
return ('{name}(C={_channels}, N={_num_stage}, L={_num_layer})'.format(name=self.__class__.__name__, **self.__dict__))
def forward(self, inputs):
feature = self.stem(inputs)
for i, cell in enumerate(self.cells):
feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling( out )
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return out, logits

View File

@@ -0,0 +1,9 @@
#####################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
#####################################################
from .InferCifarResNet_width import InferWidthCifarResNet
from .InferImagenetResNet import InferImagenetResNet
from .InferCifarResNet_depth import InferDepthCifarResNet
from .InferCifarResNet import InferCifarResNet
from .InferMobileNetV2 import InferMobileNetV2
from .InferTinyCellNet import DynamicShapeTinyNet

View File

@@ -0,0 +1,5 @@
def parse_channel_info(xstring):
blocks = xstring.split(' ')
blocks = [x.split('-') for x in blocks]
blocks = [[int(_) for _ in x] for x in blocks]
return blocks

View File

@@ -0,0 +1,502 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, torch
from collections import OrderedDict
from bisect import bisect_right
import torch.nn as nn
from ..initialization import initialize_resnet
from ..SharedUtils import additive_func
from .SoftSelect import select2withP, ChannelWiseInter
from .SoftSelect import linear_forward
from .SoftSelect import get_width_choices
def get_depth_choices(nDepth, return_num):
if nDepth == 2:
choices = (1, 2)
elif nDepth == 3:
choices = (1, 2, 3)
elif nDepth > 3:
choices = list(range(1, nDepth+1, 2))
if choices[-1] < nDepth: choices.append(nDepth)
else:
raise ValueError('invalid nDepth : {:}'.format(nDepth))
if return_num: return len(choices)
else : return choices
def conv_forward(inputs, conv, choices):
iC = conv.in_channels
fill_size = list(inputs.size())
fill_size[1] = iC - fill_size[1]
filled = torch.zeros(fill_size, device=inputs.device)
xinputs = torch.cat((inputs, filled), dim=1)
outputs = conv(xinputs)
selecteds = [outputs[:,:oC] for oC in choices]
return selecteds
class ConvBNReLU(nn.Module):
num_conv = 1
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
self.InShape = None
self.OutShape = None
self.choices = get_width_choices(nOut)
self.register_buffer('choices_tensor', torch.Tensor( self.choices ))
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
#if has_bn : self.bn = nn.BatchNorm2d(nOut)
#else : self.bn = None
self.has_bn = has_bn
self.BNs = nn.ModuleList()
for i, _out in enumerate(self.choices):
self.BNs.append(nn.BatchNorm2d(_out))
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
self.in_dim = nIn
self.out_dim = nOut
self.search_mode = 'basic'
def get_flops(self, channels, check_range=True, divide=1):
iC, oC = channels
if check_range: assert iC <= self.conv.in_channels and oC <= self.conv.out_channels, '{:} vs {:} | {:} vs {:}'.format(iC, self.conv.in_channels, oC, self.conv.out_channels)
assert isinstance(self.InShape, tuple) and len(self.InShape) == 2, 'invalid in-shape : {:}'.format(self.InShape)
assert isinstance(self.OutShape, tuple) and len(self.OutShape) == 2, 'invalid out-shape : {:}'.format(self.OutShape)
#conv_per_position_flops = self.conv.kernel_size[0] * self.conv.kernel_size[1] * iC * oC / self.conv.groups
conv_per_position_flops = (self.conv.kernel_size[0] * self.conv.kernel_size[1] * 1.0 / self.conv.groups)
all_positions = self.OutShape[0] * self.OutShape[1]
flops = (conv_per_position_flops * all_positions / divide) * iC * oC
if self.conv.bias is not None: flops += all_positions / divide
return flops
def get_range(self):
return [self.choices]
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, index, prob = tuple_inputs
index, prob = torch.squeeze(index).tolist(), torch.squeeze(prob)
probability = torch.squeeze(probability)
assert len(index) == 2, 'invalid length : {:}'.format(index)
# compute expected flop
#coordinates = torch.arange(self.x_range[0], self.x_range[1]+1).type_as(probability)
expected_outC = (self.choices_tensor * probability).sum()
expected_flop = self.get_flops([expected_inC, expected_outC], False, 1e6)
if self.avg : out = self.avg( inputs )
else : out = inputs
# convolutional layer
out_convs = conv_forward(out, self.conv, [self.choices[i] for i in index])
out_bns = [self.BNs[idx](out_conv) for idx, out_conv in zip(index, out_convs)]
# merge
out_channel = max([x.size(1) for x in out_bns])
outA = ChannelWiseInter(out_bns[0], out_channel)
outB = ChannelWiseInter(out_bns[1], out_channel)
out = outA * prob[0] + outB * prob[1]
#out = additive_func(out_bns[0]*prob[0], out_bns[1]*prob[1])
if self.relu: out = self.relu( out )
else : out = out
return out, expected_outC, expected_flop
def basic_forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.has_bn:out= self.BNs[-1]( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
if self.InShape is None:
self.InShape = (inputs.size(-2), inputs.size(-1))
self.OutShape = (out.size(-2) , out.size(-1))
return out
class ResNetBasicblock(nn.Module):
expansion = 1
num_conv = 2
def __init__(self, inplanes, planes, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ConvBNReLU(inplanes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU( planes, planes, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes
self.search_mode = 'basic'
def get_range(self):
return self.conv_a.get_range() + self.conv_b.get_range()
def get_flops(self, channels):
assert len(channels) == 3, 'invalid channels : {:}'.format(channels)
flop_A = self.conv_a.get_flops([channels[0], channels[1]])
flop_B = self.conv_b.get_flops([channels[1], channels[2]])
if hasattr(self.downsample, 'get_flops'):
flop_C = self.downsample.get_flops([channels[0], channels[-1]])
else:
flop_C = 0
if channels[0] != channels[-1] and self.downsample is None: # this short-cut will be added during the infer-train
flop_C = channels[0] * channels[-1] * self.conv_b.OutShape[0] * self.conv_b.OutShape[1]
return flop_A + flop_B + flop_C
def forward(self, inputs):
if self.search_mode == 'basic' : return self.basic_forward(inputs)
elif self.search_mode == 'search': return self.search_forward(inputs)
else: raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, indexes, probs = tuple_inputs
assert indexes.size(0) == 2 and probs.size(0) == 2 and probability.size(0) == 2
out_a, expected_inC_a, expected_flop_a = self.conv_a( (inputs, expected_inC , probability[0], indexes[0], probs[0]) )
out_b, expected_inC_b, expected_flop_b = self.conv_b( (out_a , expected_inC_a, probability[1], indexes[1], probs[1]) )
if self.downsample is not None:
residual, _, expected_flop_c = self.downsample( (inputs, expected_inC , probability[1], indexes[1], probs[1]) )
else:
residual, expected_flop_c = inputs, 0
out = additive_func(residual, out_b)
return nn.functional.relu(out, inplace=True), expected_inC_b, sum([expected_flop_a, expected_flop_b, expected_flop_c])
def basic_forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, basicblock)
return nn.functional.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, inplanes, planes, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_1x1 = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU( planes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(planes, planes*self.expansion, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes*self.expansion:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes * self.expansion
self.search_mode = 'basic'
def get_range(self):
return self.conv_1x1.get_range() + self.conv_3x3.get_range() + self.conv_1x4.get_range()
def get_flops(self, channels):
assert len(channels) == 4, 'invalid channels : {:}'.format(channels)
flop_A = self.conv_1x1.get_flops([channels[0], channels[1]])
flop_B = self.conv_3x3.get_flops([channels[1], channels[2]])
flop_C = self.conv_1x4.get_flops([channels[2], channels[3]])
if hasattr(self.downsample, 'get_flops'):
flop_D = self.downsample.get_flops([channels[0], channels[-1]])
else:
flop_D = 0
if channels[0] != channels[-1] and self.downsample is None: # this short-cut will be added during the infer-train
flop_D = channels[0] * channels[-1] * self.conv_1x4.OutShape[0] * self.conv_1x4.OutShape[1]
return flop_A + flop_B + flop_C + flop_D
def forward(self, inputs):
if self.search_mode == 'basic' : return self.basic_forward(inputs)
elif self.search_mode == 'search': return self.search_forward(inputs)
else: raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def basic_forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, bottleneck)
return nn.functional.relu(out, inplace=True)
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, indexes, probs = tuple_inputs
assert indexes.size(0) == 3 and probs.size(0) == 3 and probability.size(0) == 3
out_1x1, expected_inC_1x1, expected_flop_1x1 = self.conv_1x1( (inputs, expected_inC , probability[0], indexes[0], probs[0]) )
out_3x3, expected_inC_3x3, expected_flop_3x3 = self.conv_3x3( (out_1x1,expected_inC_1x1, probability[1], indexes[1], probs[1]) )
out_1x4, expected_inC_1x4, expected_flop_1x4 = self.conv_1x4( (out_3x3,expected_inC_3x3, probability[2], indexes[2], probs[2]) )
if self.downsample is not None:
residual, _, expected_flop_c = self.downsample( (inputs, expected_inC , probability[2], indexes[2], probs[2]) )
else:
residual, expected_flop_c = inputs, 0
out = additive_func(residual, out_1x4)
return nn.functional.relu(out, inplace=True), expected_inC_1x4, sum([expected_flop_1x1, expected_flop_3x3, expected_flop_1x4, expected_flop_c])
class SearchShapeCifarResNet(nn.Module):
def __init__(self, block_name, depth, num_classes):
super(SearchShapeCifarResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'ResNetBasicblock':
block = ResNetBasicblock
assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 2) // 6
elif block_name == 'ResNetBottleneck':
block = ResNetBottleneck
assert (depth - 2) % 9 == 0, 'depth should be one of 164'
layer_blocks = (depth - 2) // 9
else:
raise ValueError('invalid block : {:}'.format(block_name))
self.message = 'SearchShapeCifarResNet : Depth : {:} , Layers for each block : {:}'.format(depth, layer_blocks)
self.num_classes = num_classes
self.channels = [16]
self.layers = nn.ModuleList( [ ConvBNReLU(3, 16, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
self.InShape = None
self.depth_info = OrderedDict()
self.depth_at_i = OrderedDict()
for stage in range(3):
cur_block_choices = get_depth_choices(layer_blocks, False)
assert cur_block_choices[-1] == layer_blocks, 'stage={:}, {:} vs {:}'.format(stage, cur_block_choices, layer_blocks)
self.message += "\nstage={:} ::: depth-block-choices={:} for {:} blocks.".format(stage, cur_block_choices, layer_blocks)
block_choices, xstart = [], len(self.layers)
for iL in range(layer_blocks):
iC = self.channels[-1]
planes = 16 * (2**stage)
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iC, planes, stride)
self.channels.append( module.out_dim )
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iC, module.out_dim, stride)
# added for depth
layer_index = len(self.layers) - 1
if iL + 1 in cur_block_choices: block_choices.append( layer_index )
if iL + 1 == layer_blocks:
self.depth_info[layer_index] = {'choices': block_choices,
'stage' : stage,
'xstart' : xstart}
self.depth_info_list = []
for xend, info in self.depth_info.items():
self.depth_info_list.append( (xend, info) )
xstart, xstage = info['xstart'], info['stage']
for ilayer in range(xstart, xend+1):
idx = bisect_right(info['choices'], ilayer-1)
self.depth_at_i[ilayer] = (xstage, idx)
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(module.out_dim, num_classes)
self.InShape = None
self.tau = -1
self.search_mode = 'basic'
#assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth)
# parameters for width
self.Ranges = []
self.layer2indexRange = []
for i, layer in enumerate(self.layers):
start_index = len(self.Ranges)
self.Ranges += layer.get_range()
self.layer2indexRange.append( (start_index, len(self.Ranges)) )
assert len(self.Ranges) + 1 == depth, 'invalid depth check {:} vs {:}'.format(len(self.Ranges) + 1, depth)
self.register_parameter('width_attentions', nn.Parameter(torch.Tensor(len(self.Ranges), get_width_choices(None))))
self.register_parameter('depth_attentions', nn.Parameter(torch.Tensor(3, get_depth_choices(layer_blocks, True))))
nn.init.normal_(self.width_attentions, 0, 0.01)
nn.init.normal_(self.depth_attentions, 0, 0.01)
self.apply(initialize_resnet)
def arch_parameters(self, LR=None):
if LR is None:
return [self.width_attentions, self.depth_attentions]
else:
return [
{"params": self.width_attentions, "lr": LR},
{"params": self.depth_attentions, "lr": LR},
]
def base_parameters(self):
return list(self.layers.parameters()) + list(self.avgpool.parameters()) + list(self.classifier.parameters())
def get_flop(self, mode, config_dict, extra_info):
if config_dict is not None: config_dict = config_dict.copy()
# select channels
channels = [3]
for i, weight in enumerate(self.width_attentions):
if mode == 'genotype':
with torch.no_grad():
probe = nn.functional.softmax(weight, dim=0)
C = self.Ranges[i][ torch.argmax(probe).item() ]
elif mode == 'max':
C = self.Ranges[i][-1]
elif mode == 'fix':
C = int( math.sqrt( extra_info ) * self.Ranges[i][-1] )
elif mode == 'random':
assert isinstance(extra_info, float), 'invalid extra_info : {:}'.format(extra_info)
with torch.no_grad():
prob = nn.functional.softmax(weight, dim=0)
approximate_C = int( math.sqrt( extra_info ) * self.Ranges[i][-1] )
for j in range(prob.size(0)):
prob[j] = 1 / (abs(j - (approximate_C-self.Ranges[i][j])) + 0.2)
C = self.Ranges[i][ torch.multinomial(prob, 1, False).item() ]
else:
raise ValueError('invalid mode : {:}'.format(mode))
channels.append( C )
# select depth
if mode == 'genotype':
with torch.no_grad():
depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
choices = torch.argmax(depth_probs, dim=1).cpu().tolist()
elif mode == 'max' or mode == 'fix':
choices = [depth_probs.size(1)-1 for _ in range(depth_probs.size(0))]
elif mode == 'random':
with torch.no_grad():
depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
choices = torch.multinomial(depth_probs, 1, False).cpu().tolist()
else:
raise ValueError('invalid mode : {:}'.format(mode))
selected_layers = []
for choice, xvalue in zip(choices, self.depth_info_list):
xtemp = xvalue[1]['choices'][choice] - xvalue[1]['xstart'] + 1
selected_layers.append(xtemp)
flop = 0
for i, layer in enumerate(self.layers):
s, e = self.layer2indexRange[i]
xchl = tuple( channels[s:e+1] )
if i in self.depth_at_i:
xstagei, xatti = self.depth_at_i[i]
if xatti <= choices[xstagei]: # leave this depth
flop+= layer.get_flops(xchl)
else:
flop+= 0 # do not use this layer
else:
flop+= layer.get_flops(xchl)
# the last fc layer
flop += channels[-1] * self.classifier.out_features
if config_dict is None:
return flop / 1e6
else:
config_dict['xchannels'] = channels
config_dict['xblocks'] = selected_layers
config_dict['super_type'] = 'infer-shape'
config_dict['estimated_FLOP'] = flop / 1e6
return flop / 1e6, config_dict
def get_arch_info(self):
string = "for depth and width, there are {:} + {:} attention probabilities.".format(len(self.depth_attentions), len(self.width_attentions))
string+= '\n{:}'.format(self.depth_info)
discrepancy = []
with torch.no_grad():
for i, att in enumerate(self.depth_attentions):
prob = nn.functional.softmax(att, dim=0)
prob = prob.cpu() ; selc = prob.argmax().item() ; prob = prob.tolist()
prob = ['{:.3f}'.format(x) for x in prob]
xstring = '{:03d}/{:03d}-th : {:}'.format(i, len(self.depth_attentions), ' '.join(prob))
logt = ['{:.4f}'.format(x) for x in att.cpu().tolist()]
xstring += ' || {:17s}'.format(' '.join(logt))
prob = sorted( [float(x) for x in prob] )
disc = prob[-1] - prob[-2]
xstring += ' || discrepancy={:.2f} || select={:}/{:}'.format(disc, selc, len(prob))
discrepancy.append( disc )
string += '\n{:}'.format(xstring)
string += '\n-----------------------------------------------'
for i, att in enumerate(self.width_attentions):
prob = nn.functional.softmax(att, dim=0)
prob = prob.cpu() ; selc = prob.argmax().item() ; prob = prob.tolist()
prob = ['{:.3f}'.format(x) for x in prob]
xstring = '{:03d}/{:03d}-th : {:}'.format(i, len(self.width_attentions), ' '.join(prob))
logt = ['{:.3f}'.format(x) for x in att.cpu().tolist()]
xstring += ' || {:52s}'.format(' '.join(logt))
prob = sorted( [float(x) for x in prob] )
disc = prob[-1] - prob[-2]
xstring += ' || dis={:.2f} || select={:}/{:}'.format(disc, selc, len(prob))
discrepancy.append( disc )
string += '\n{:}'.format(xstring)
return string, discrepancy
def set_tau(self, tau_max, tau_min, epoch_ratio):
assert epoch_ratio >= 0 and epoch_ratio <= 1, 'invalid epoch-ratio : {:}'.format(epoch_ratio)
tau = tau_min + (tau_max-tau_min) * (1 + math.cos(math.pi * epoch_ratio)) / 2
self.tau = tau
def get_message(self):
return self.message
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, inputs):
flop_width_probs = nn.functional.softmax(self.width_attentions, dim=1)
flop_depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
flop_depth_probs = torch.flip( torch.cumsum( torch.flip(flop_depth_probs, [1]), 1 ), [1] )
selected_widths, selected_width_probs = select2withP(self.width_attentions, self.tau)
selected_depth_probs = select2withP(self.depth_attentions, self.tau, True)
with torch.no_grad():
selected_widths = selected_widths.cpu()
x, last_channel_idx, expected_inC, flops = inputs, 0, 3, []
feature_maps = []
for i, layer in enumerate(self.layers):
selected_w_index = selected_widths [last_channel_idx: last_channel_idx+layer.num_conv]
selected_w_probs = selected_width_probs[last_channel_idx: last_channel_idx+layer.num_conv]
layer_prob = flop_width_probs [last_channel_idx: last_channel_idx+layer.num_conv]
x, expected_inC, expected_flop = layer( (x, expected_inC, layer_prob, selected_w_index, selected_w_probs) )
feature_maps.append( x )
last_channel_idx += layer.num_conv
if i in self.depth_info: # aggregate the information
choices = self.depth_info[i]['choices']
xstagei = self.depth_info[i]['stage']
#print ('iL={:}, choices={:}, stage={:}, probs={:}'.format(i, choices, xstagei, selected_depth_probs[xstagei].cpu().tolist()))
#for A, W in zip(choices, selected_depth_probs[xstagei]):
# print('Size = {:}, W = {:}'.format(feature_maps[A].size(), W))
possible_tensors = []
max_C = max( feature_maps[A].size(1) for A in choices )
for tempi, A in enumerate(choices):
xtensor = ChannelWiseInter(feature_maps[A], max_C)
#drop_ratio = 1-(tempi+1.0)/len(choices)
#xtensor = drop_path(xtensor, drop_ratio)
possible_tensors.append( xtensor )
weighted_sum = sum( xtensor * W for xtensor, W in zip(possible_tensors, selected_depth_probs[xstagei]) )
x = weighted_sum
if i in self.depth_at_i:
xstagei, xatti = self.depth_at_i[i]
x_expected_flop = flop_depth_probs[xstagei, xatti] * expected_flop
else:
x_expected_flop = expected_flop
flops.append( x_expected_flop )
flops.append( expected_inC * (self.classifier.out_features*1.0/1e6) )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = linear_forward(features, self.classifier)
return logits, torch.stack( [sum(flops)] )
def basic_forward(self, inputs):
if self.InShape is None: self.InShape = (inputs.size(-2), inputs.size(-1))
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,340 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, torch
from collections import OrderedDict
from bisect import bisect_right
import torch.nn as nn
from ..initialization import initialize_resnet
from ..SharedUtils import additive_func
from .SoftSelect import select2withP, ChannelWiseInter
from .SoftSelect import linear_forward
from .SoftSelect import get_width_choices
def get_depth_choices(nDepth, return_num):
if nDepth == 2:
choices = (1, 2)
elif nDepth == 3:
choices = (1, 2, 3)
elif nDepth > 3:
choices = list(range(1, nDepth+1, 2))
if choices[-1] < nDepth: choices.append(nDepth)
else:
raise ValueError('invalid nDepth : {:}'.format(nDepth))
if return_num: return len(choices)
else : return choices
class ConvBNReLU(nn.Module):
num_conv = 1
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
self.InShape = None
self.OutShape = None
self.choices = get_width_choices(nOut)
self.register_buffer('choices_tensor', torch.Tensor( self.choices ))
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
if has_bn : self.bn = nn.BatchNorm2d(nOut)
else : self.bn = None
if has_relu: self.relu = nn.ReLU(inplace=False)
else : self.relu = None
self.in_dim = nIn
self.out_dim = nOut
def get_flops(self, divide=1):
iC, oC = self.in_dim, self.out_dim
assert iC <= self.conv.in_channels and oC <= self.conv.out_channels, '{:} vs {:} | {:} vs {:}'.format(iC, self.conv.in_channels, oC, self.conv.out_channels)
assert isinstance(self.InShape, tuple) and len(self.InShape) == 2, 'invalid in-shape : {:}'.format(self.InShape)
assert isinstance(self.OutShape, tuple) and len(self.OutShape) == 2, 'invalid out-shape : {:}'.format(self.OutShape)
#conv_per_position_flops = self.conv.kernel_size[0] * self.conv.kernel_size[1] * iC * oC / self.conv.groups
conv_per_position_flops = (self.conv.kernel_size[0] * self.conv.kernel_size[1] * 1.0 / self.conv.groups)
all_positions = self.OutShape[0] * self.OutShape[1]
flops = (conv_per_position_flops * all_positions / divide) * iC * oC
if self.conv.bias is not None: flops += all_positions / divide
return flops
def forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.bn : out = self.bn( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
if self.InShape is None:
self.InShape = (inputs.size(-2), inputs.size(-1))
self.OutShape = (out.size(-2) , out.size(-1))
return out
class ResNetBasicblock(nn.Module):
expansion = 1
num_conv = 2
def __init__(self, inplanes, planes, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ConvBNReLU(inplanes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU( planes, planes, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes
self.search_mode = 'basic'
def get_flops(self, divide=1):
flop_A = self.conv_a.get_flops(divide)
flop_B = self.conv_b.get_flops(divide)
if hasattr(self.downsample, 'get_flops'):
flop_C = self.downsample.get_flops(divide)
else:
flop_C = 0
return flop_A + flop_B + flop_C
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, basicblock)
return nn.functional.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, inplanes, planes, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_1x1 = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU( planes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(planes, planes*self.expansion, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes*self.expansion:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes * self.expansion
self.search_mode = 'basic'
def get_range(self):
return self.conv_1x1.get_range() + self.conv_3x3.get_range() + self.conv_1x4.get_range()
def get_flops(self, divide):
flop_A = self.conv_1x1.get_flops(divide)
flop_B = self.conv_3x3.get_flops(divide)
flop_C = self.conv_1x4.get_flops(divide)
if hasattr(self.downsample, 'get_flops'):
flop_D = self.downsample.get_flops(divide)
else:
flop_D = 0
return flop_A + flop_B + flop_C + flop_D
def forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, bottleneck)
return nn.functional.relu(out, inplace=True)
class SearchDepthCifarResNet(nn.Module):
def __init__(self, block_name, depth, num_classes):
super(SearchDepthCifarResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'ResNetBasicblock':
block = ResNetBasicblock
assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 2) // 6
elif block_name == 'ResNetBottleneck':
block = ResNetBottleneck
assert (depth - 2) % 9 == 0, 'depth should be one of 164'
layer_blocks = (depth - 2) // 9
else:
raise ValueError('invalid block : {:}'.format(block_name))
self.message = 'SearchShapeCifarResNet : Depth : {:} , Layers for each block : {:}'.format(depth, layer_blocks)
self.num_classes = num_classes
self.channels = [16]
self.layers = nn.ModuleList( [ ConvBNReLU(3, 16, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
self.InShape = None
self.depth_info = OrderedDict()
self.depth_at_i = OrderedDict()
for stage in range(3):
cur_block_choices = get_depth_choices(layer_blocks, False)
assert cur_block_choices[-1] == layer_blocks, 'stage={:}, {:} vs {:}'.format(stage, cur_block_choices, layer_blocks)
self.message += "\nstage={:} ::: depth-block-choices={:} for {:} blocks.".format(stage, cur_block_choices, layer_blocks)
block_choices, xstart = [], len(self.layers)
for iL in range(layer_blocks):
iC = self.channels[-1]
planes = 16 * (2**stage)
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iC, planes, stride)
self.channels.append( module.out_dim )
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iC, module.out_dim, stride)
# added for depth
layer_index = len(self.layers) - 1
if iL + 1 in cur_block_choices: block_choices.append( layer_index )
if iL + 1 == layer_blocks:
self.depth_info[layer_index] = {'choices': block_choices,
'stage' : stage,
'xstart' : xstart}
self.depth_info_list = []
for xend, info in self.depth_info.items():
self.depth_info_list.append( (xend, info) )
xstart, xstage = info['xstart'], info['stage']
for ilayer in range(xstart, xend+1):
idx = bisect_right(info['choices'], ilayer-1)
self.depth_at_i[ilayer] = (xstage, idx)
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(module.out_dim, num_classes)
self.InShape = None
self.tau = -1
self.search_mode = 'basic'
#assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth)
self.register_parameter('depth_attentions', nn.Parameter(torch.Tensor(3, get_depth_choices(layer_blocks, True))))
nn.init.normal_(self.depth_attentions, 0, 0.01)
self.apply(initialize_resnet)
def arch_parameters(self):
return [self.depth_attentions]
def base_parameters(self):
return list(self.layers.parameters()) + list(self.avgpool.parameters()) + list(self.classifier.parameters())
def get_flop(self, mode, config_dict, extra_info):
if config_dict is not None: config_dict = config_dict.copy()
# select depth
if mode == 'genotype':
with torch.no_grad():
depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
choices = torch.argmax(depth_probs, dim=1).cpu().tolist()
elif mode == 'max':
choices = [depth_probs.size(1)-1 for _ in range(depth_probs.size(0))]
elif mode == 'random':
with torch.no_grad():
depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
choices = torch.multinomial(depth_probs, 1, False).cpu().tolist()
else:
raise ValueError('invalid mode : {:}'.format(mode))
selected_layers = []
for choice, xvalue in zip(choices, self.depth_info_list):
xtemp = xvalue[1]['choices'][choice] - xvalue[1]['xstart'] + 1
selected_layers.append(xtemp)
flop = 0
for i, layer in enumerate(self.layers):
if i in self.depth_at_i:
xstagei, xatti = self.depth_at_i[i]
if xatti <= choices[xstagei]: # leave this depth
flop+= layer.get_flops()
else:
flop+= 0 # do not use this layer
else:
flop+= layer.get_flops()
# the last fc layer
flop += self.classifier.in_features * self.classifier.out_features
if config_dict is None:
return flop / 1e6
else:
config_dict['xblocks'] = selected_layers
config_dict['super_type'] = 'infer-depth'
config_dict['estimated_FLOP'] = flop / 1e6
return flop / 1e6, config_dict
def get_arch_info(self):
string = "for depth, there are {:} attention probabilities.".format(len(self.depth_attentions))
string+= '\n{:}'.format(self.depth_info)
discrepancy = []
with torch.no_grad():
for i, att in enumerate(self.depth_attentions):
prob = nn.functional.softmax(att, dim=0)
prob = prob.cpu() ; selc = prob.argmax().item() ; prob = prob.tolist()
prob = ['{:.3f}'.format(x) for x in prob]
xstring = '{:03d}/{:03d}-th : {:}'.format(i, len(self.depth_attentions), ' '.join(prob))
logt = ['{:.4f}'.format(x) for x in att.cpu().tolist()]
xstring += ' || {:17s}'.format(' '.join(logt))
prob = sorted( [float(x) for x in prob] )
disc = prob[-1] - prob[-2]
xstring += ' || discrepancy={:.2f} || select={:}/{:}'.format(disc, selc, len(prob))
discrepancy.append( disc )
string += '\n{:}'.format(xstring)
return string, discrepancy
def set_tau(self, tau_max, tau_min, epoch_ratio):
assert epoch_ratio >= 0 and epoch_ratio <= 1, 'invalid epoch-ratio : {:}'.format(epoch_ratio)
tau = tau_min + (tau_max-tau_min) * (1 + math.cos(math.pi * epoch_ratio)) / 2
self.tau = tau
def get_message(self):
return self.message
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, inputs):
flop_depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
flop_depth_probs = torch.flip( torch.cumsum( torch.flip(flop_depth_probs, [1]), 1 ), [1] )
selected_depth_probs = select2withP(self.depth_attentions, self.tau, True)
x, flops = inputs, []
feature_maps = []
for i, layer in enumerate(self.layers):
layer_i = layer( x )
feature_maps.append( layer_i )
if i in self.depth_info: # aggregate the information
choices = self.depth_info[i]['choices']
xstagei = self.depth_info[i]['stage']
possible_tensors = []
for tempi, A in enumerate(choices):
xtensor = feature_maps[A]
possible_tensors.append( xtensor )
weighted_sum = sum( xtensor * W for xtensor, W in zip(possible_tensors, selected_depth_probs[xstagei]) )
x = weighted_sum
else:
x = layer_i
if i in self.depth_at_i:
xstagei, xatti = self.depth_at_i[i]
#print ('layer-{:03d}, stage={:}, att={:}, prob={:}, flop={:}'.format(i, xstagei, xatti, flop_depth_probs[xstagei, xatti].item(), layer.get_flops(1e6)))
x_expected_flop = flop_depth_probs[xstagei, xatti] * layer.get_flops(1e6)
else:
x_expected_flop = layer.get_flops(1e6)
flops.append( x_expected_flop )
flops.append( (self.classifier.in_features * self.classifier.out_features*1.0/1e6) )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = linear_forward(features, self.classifier)
return logits, torch.stack( [sum(flops)] )
def basic_forward(self, inputs):
if self.InShape is None: self.InShape = (inputs.size(-2), inputs.size(-1))
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,393 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, torch
import torch.nn as nn
from ..initialization import initialize_resnet
from ..SharedUtils import additive_func
from .SoftSelect import select2withP, ChannelWiseInter
from .SoftSelect import linear_forward
from .SoftSelect import get_width_choices as get_choices
def conv_forward(inputs, conv, choices):
iC = conv.in_channels
fill_size = list(inputs.size())
fill_size[1] = iC - fill_size[1]
filled = torch.zeros(fill_size, device=inputs.device)
xinputs = torch.cat((inputs, filled), dim=1)
outputs = conv(xinputs)
selecteds = [outputs[:,:oC] for oC in choices]
return selecteds
class ConvBNReLU(nn.Module):
num_conv = 1
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
self.InShape = None
self.OutShape = None
self.choices = get_choices(nOut)
self.register_buffer('choices_tensor', torch.Tensor( self.choices ))
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
#if has_bn : self.bn = nn.BatchNorm2d(nOut)
#else : self.bn = None
self.has_bn = has_bn
self.BNs = nn.ModuleList()
for i, _out in enumerate(self.choices):
self.BNs.append(nn.BatchNorm2d(_out))
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
self.in_dim = nIn
self.out_dim = nOut
self.search_mode = 'basic'
def get_flops(self, channels, check_range=True, divide=1):
iC, oC = channels
if check_range: assert iC <= self.conv.in_channels and oC <= self.conv.out_channels, '{:} vs {:} | {:} vs {:}'.format(iC, self.conv.in_channels, oC, self.conv.out_channels)
assert isinstance(self.InShape, tuple) and len(self.InShape) == 2, 'invalid in-shape : {:}'.format(self.InShape)
assert isinstance(self.OutShape, tuple) and len(self.OutShape) == 2, 'invalid out-shape : {:}'.format(self.OutShape)
#conv_per_position_flops = self.conv.kernel_size[0] * self.conv.kernel_size[1] * iC * oC / self.conv.groups
conv_per_position_flops = (self.conv.kernel_size[0] * self.conv.kernel_size[1] * 1.0 / self.conv.groups)
all_positions = self.OutShape[0] * self.OutShape[1]
flops = (conv_per_position_flops * all_positions / divide) * iC * oC
if self.conv.bias is not None: flops += all_positions / divide
return flops
def get_range(self):
return [self.choices]
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, index, prob = tuple_inputs
index, prob = torch.squeeze(index).tolist(), torch.squeeze(prob)
probability = torch.squeeze(probability)
assert len(index) == 2, 'invalid length : {:}'.format(index)
# compute expected flop
#coordinates = torch.arange(self.x_range[0], self.x_range[1]+1).type_as(probability)
expected_outC = (self.choices_tensor * probability).sum()
expected_flop = self.get_flops([expected_inC, expected_outC], False, 1e6)
if self.avg : out = self.avg( inputs )
else : out = inputs
# convolutional layer
out_convs = conv_forward(out, self.conv, [self.choices[i] for i in index])
out_bns = [self.BNs[idx](out_conv) for idx, out_conv in zip(index, out_convs)]
# merge
out_channel = max([x.size(1) for x in out_bns])
outA = ChannelWiseInter(out_bns[0], out_channel)
outB = ChannelWiseInter(out_bns[1], out_channel)
out = outA * prob[0] + outB * prob[1]
#out = additive_func(out_bns[0]*prob[0], out_bns[1]*prob[1])
if self.relu: out = self.relu( out )
else : out = out
return out, expected_outC, expected_flop
def basic_forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.has_bn:out= self.BNs[-1]( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
if self.InShape is None:
self.InShape = (inputs.size(-2), inputs.size(-1))
self.OutShape = (out.size(-2) , out.size(-1))
return out
class ResNetBasicblock(nn.Module):
expansion = 1
num_conv = 2
def __init__(self, inplanes, planes, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ConvBNReLU(inplanes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU( planes, planes, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes
self.search_mode = 'basic'
def get_range(self):
return self.conv_a.get_range() + self.conv_b.get_range()
def get_flops(self, channels):
assert len(channels) == 3, 'invalid channels : {:}'.format(channels)
flop_A = self.conv_a.get_flops([channels[0], channels[1]])
flop_B = self.conv_b.get_flops([channels[1], channels[2]])
if hasattr(self.downsample, 'get_flops'):
flop_C = self.downsample.get_flops([channels[0], channels[-1]])
else:
flop_C = 0
if channels[0] != channels[-1] and self.downsample is None: # this short-cut will be added during the infer-train
flop_C = channels[0] * channels[-1] * self.conv_b.OutShape[0] * self.conv_b.OutShape[1]
return flop_A + flop_B + flop_C
def forward(self, inputs):
if self.search_mode == 'basic' : return self.basic_forward(inputs)
elif self.search_mode == 'search': return self.search_forward(inputs)
else: raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, indexes, probs = tuple_inputs
assert indexes.size(0) == 2 and probs.size(0) == 2 and probability.size(0) == 2
out_a, expected_inC_a, expected_flop_a = self.conv_a( (inputs, expected_inC , probability[0], indexes[0], probs[0]) )
out_b, expected_inC_b, expected_flop_b = self.conv_b( (out_a , expected_inC_a, probability[1], indexes[1], probs[1]) )
if self.downsample is not None:
residual, _, expected_flop_c = self.downsample( (inputs, expected_inC , probability[1], indexes[1], probs[1]) )
else:
residual, expected_flop_c = inputs, 0
out = additive_func(residual, out_b)
return nn.functional.relu(out, inplace=True), expected_inC_b, sum([expected_flop_a, expected_flop_b, expected_flop_c])
def basic_forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, basicblock)
return nn.functional.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, inplanes, planes, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_1x1 = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU( planes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(planes, planes*self.expansion, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes*self.expansion:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes * self.expansion
self.search_mode = 'basic'
def get_range(self):
return self.conv_1x1.get_range() + self.conv_3x3.get_range() + self.conv_1x4.get_range()
def get_flops(self, channels):
assert len(channels) == 4, 'invalid channels : {:}'.format(channels)
flop_A = self.conv_1x1.get_flops([channels[0], channels[1]])
flop_B = self.conv_3x3.get_flops([channels[1], channels[2]])
flop_C = self.conv_1x4.get_flops([channels[2], channels[3]])
if hasattr(self.downsample, 'get_flops'):
flop_D = self.downsample.get_flops([channels[0], channels[-1]])
else:
flop_D = 0
if channels[0] != channels[-1] and self.downsample is None: # this short-cut will be added during the infer-train
flop_D = channels[0] * channels[-1] * self.conv_1x4.OutShape[0] * self.conv_1x4.OutShape[1]
return flop_A + flop_B + flop_C + flop_D
def forward(self, inputs):
if self.search_mode == 'basic' : return self.basic_forward(inputs)
elif self.search_mode == 'search': return self.search_forward(inputs)
else: raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def basic_forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, bottleneck)
return nn.functional.relu(out, inplace=True)
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, indexes, probs = tuple_inputs
assert indexes.size(0) == 3 and probs.size(0) == 3 and probability.size(0) == 3
out_1x1, expected_inC_1x1, expected_flop_1x1 = self.conv_1x1( (inputs, expected_inC , probability[0], indexes[0], probs[0]) )
out_3x3, expected_inC_3x3, expected_flop_3x3 = self.conv_3x3( (out_1x1,expected_inC_1x1, probability[1], indexes[1], probs[1]) )
out_1x4, expected_inC_1x4, expected_flop_1x4 = self.conv_1x4( (out_3x3,expected_inC_3x3, probability[2], indexes[2], probs[2]) )
if self.downsample is not None:
residual, _, expected_flop_c = self.downsample( (inputs, expected_inC , probability[2], indexes[2], probs[2]) )
else:
residual, expected_flop_c = inputs, 0
out = additive_func(residual, out_1x4)
return nn.functional.relu(out, inplace=True), expected_inC_1x4, sum([expected_flop_1x1, expected_flop_3x3, expected_flop_1x4, expected_flop_c])
class SearchWidthCifarResNet(nn.Module):
def __init__(self, block_name, depth, num_classes):
super(SearchWidthCifarResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'ResNetBasicblock':
block = ResNetBasicblock
assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
layer_blocks = (depth - 2) // 6
elif block_name == 'ResNetBottleneck':
block = ResNetBottleneck
assert (depth - 2) % 9 == 0, 'depth should be one of 164'
layer_blocks = (depth - 2) // 9
else:
raise ValueError('invalid block : {:}'.format(block_name))
self.message = 'SearchWidthCifarResNet : Depth : {:} , Layers for each block : {:}'.format(depth, layer_blocks)
self.num_classes = num_classes
self.channels = [16]
self.layers = nn.ModuleList( [ ConvBNReLU(3, 16, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
self.InShape = None
for stage in range(3):
for iL in range(layer_blocks):
iC = self.channels[-1]
planes = 16 * (2**stage)
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iC, planes, stride)
self.channels.append( module.out_dim )
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iC, module.out_dim, stride)
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(module.out_dim, num_classes)
self.InShape = None
self.tau = -1
self.search_mode = 'basic'
#assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth)
# parameters for width
self.Ranges = []
self.layer2indexRange = []
for i, layer in enumerate(self.layers):
start_index = len(self.Ranges)
self.Ranges += layer.get_range()
self.layer2indexRange.append( (start_index, len(self.Ranges)) )
assert len(self.Ranges) + 1 == depth, 'invalid depth check {:} vs {:}'.format(len(self.Ranges) + 1, depth)
self.register_parameter('width_attentions', nn.Parameter(torch.Tensor(len(self.Ranges), get_choices(None))))
nn.init.normal_(self.width_attentions, 0, 0.01)
self.apply(initialize_resnet)
def arch_parameters(self):
return [self.width_attentions]
def base_parameters(self):
return list(self.layers.parameters()) + list(self.avgpool.parameters()) + list(self.classifier.parameters())
def get_flop(self, mode, config_dict, extra_info):
if config_dict is not None: config_dict = config_dict.copy()
#weights = [F.softmax(x, dim=0) for x in self.width_attentions]
channels = [3]
for i, weight in enumerate(self.width_attentions):
if mode == 'genotype':
with torch.no_grad():
probe = nn.functional.softmax(weight, dim=0)
C = self.Ranges[i][ torch.argmax(probe).item() ]
elif mode == 'max':
C = self.Ranges[i][-1]
elif mode == 'fix':
C = int( math.sqrt( extra_info ) * self.Ranges[i][-1] )
elif mode == 'random':
assert isinstance(extra_info, float), 'invalid extra_info : {:}'.format(extra_info)
with torch.no_grad():
prob = nn.functional.softmax(weight, dim=0)
approximate_C = int( math.sqrt( extra_info ) * self.Ranges[i][-1] )
for j in range(prob.size(0)):
prob[j] = 1 / (abs(j - (approximate_C-self.Ranges[i][j])) + 0.2)
C = self.Ranges[i][ torch.multinomial(prob, 1, False).item() ]
else:
raise ValueError('invalid mode : {:}'.format(mode))
channels.append( C )
flop = 0
for i, layer in enumerate(self.layers):
s, e = self.layer2indexRange[i]
xchl = tuple( channels[s:e+1] )
flop+= layer.get_flops(xchl)
# the last fc layer
flop += channels[-1] * self.classifier.out_features
if config_dict is None:
return flop / 1e6
else:
config_dict['xchannels'] = channels
config_dict['super_type'] = 'infer-width'
config_dict['estimated_FLOP'] = flop / 1e6
return flop / 1e6, config_dict
def get_arch_info(self):
string = "for width, there are {:} attention probabilities.".format(len(self.width_attentions))
discrepancy = []
with torch.no_grad():
for i, att in enumerate(self.width_attentions):
prob = nn.functional.softmax(att, dim=0)
prob = prob.cpu() ; selc = prob.argmax().item() ; prob = prob.tolist()
prob = ['{:.3f}'.format(x) for x in prob]
xstring = '{:03d}/{:03d}-th : {:}'.format(i, len(self.width_attentions), ' '.join(prob))
logt = ['{:.3f}'.format(x) for x in att.cpu().tolist()]
xstring += ' || {:52s}'.format(' '.join(logt))
prob = sorted( [float(x) for x in prob] )
disc = prob[-1] - prob[-2]
xstring += ' || dis={:.2f} || select={:}/{:}'.format(disc, selc, len(prob))
discrepancy.append( disc )
string += '\n{:}'.format(xstring)
return string, discrepancy
def set_tau(self, tau_max, tau_min, epoch_ratio):
assert epoch_ratio >= 0 and epoch_ratio <= 1, 'invalid epoch-ratio : {:}'.format(epoch_ratio)
tau = tau_min + (tau_max-tau_min) * (1 + math.cos(math.pi * epoch_ratio)) / 2
self.tau = tau
def get_message(self):
return self.message
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, inputs):
flop_probs = nn.functional.softmax(self.width_attentions, dim=1)
selected_widths, selected_probs = select2withP(self.width_attentions, self.tau)
with torch.no_grad():
selected_widths = selected_widths.cpu()
x, last_channel_idx, expected_inC, flops = inputs, 0, 3, []
for i, layer in enumerate(self.layers):
selected_w_index = selected_widths[last_channel_idx: last_channel_idx+layer.num_conv]
selected_w_probs = selected_probs[last_channel_idx: last_channel_idx+layer.num_conv]
layer_prob = flop_probs[last_channel_idx: last_channel_idx+layer.num_conv]
x, expected_inC, expected_flop = layer( (x, expected_inC, layer_prob, selected_w_index, selected_w_probs) )
last_channel_idx += layer.num_conv
flops.append( expected_flop )
flops.append( expected_inC * (self.classifier.out_features*1.0/1e6) )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = linear_forward(features, self.classifier)
return logits, torch.stack( [sum(flops)] )
def basic_forward(self, inputs):
if self.InShape is None: self.InShape = (inputs.size(-2), inputs.size(-1))
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,482 @@
import math, torch
from collections import OrderedDict
from bisect import bisect_right
import torch.nn as nn
from ..initialization import initialize_resnet
from ..SharedUtils import additive_func
from .SoftSelect import select2withP, ChannelWiseInter
from .SoftSelect import linear_forward
from .SoftSelect import get_width_choices
def get_depth_choices(layers):
min_depth = min(layers)
info = {'num': min_depth}
for i, depth in enumerate(layers):
choices = []
for j in range(1, min_depth+1):
choices.append( int( float(depth)*j/min_depth ) )
info[i] = choices
return info
def conv_forward(inputs, conv, choices):
iC = conv.in_channels
fill_size = list(inputs.size())
fill_size[1] = iC - fill_size[1]
filled = torch.zeros(fill_size, device=inputs.device)
xinputs = torch.cat((inputs, filled), dim=1)
outputs = conv(xinputs)
selecteds = [outputs[:,:oC] for oC in choices]
return selecteds
class ConvBNReLU(nn.Module):
num_conv = 1
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu, last_max_pool=False):
super(ConvBNReLU, self).__init__()
self.InShape = None
self.OutShape = None
self.choices = get_width_choices(nOut)
self.register_buffer('choices_tensor', torch.Tensor( self.choices ))
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
#if has_bn : self.bn = nn.BatchNorm2d(nOut)
#else : self.bn = None
self.has_bn = has_bn
self.BNs = nn.ModuleList()
for i, _out in enumerate(self.choices):
self.BNs.append(nn.BatchNorm2d(_out))
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
if last_max_pool: self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
else : self.maxpool = None
self.in_dim = nIn
self.out_dim = nOut
self.search_mode = 'basic'
def get_flops(self, channels, check_range=True, divide=1):
iC, oC = channels
if check_range: assert iC <= self.conv.in_channels and oC <= self.conv.out_channels, '{:} vs {:} | {:} vs {:}'.format(iC, self.conv.in_channels, oC, self.conv.out_channels)
assert isinstance(self.InShape, tuple) and len(self.InShape) == 2, 'invalid in-shape : {:}'.format(self.InShape)
assert isinstance(self.OutShape, tuple) and len(self.OutShape) == 2, 'invalid out-shape : {:}'.format(self.OutShape)
#conv_per_position_flops = self.conv.kernel_size[0] * self.conv.kernel_size[1] * iC * oC / self.conv.groups
conv_per_position_flops = (self.conv.kernel_size[0] * self.conv.kernel_size[1] * 1.0 / self.conv.groups)
all_positions = self.OutShape[0] * self.OutShape[1]
flops = (conv_per_position_flops * all_positions / divide) * iC * oC
if self.conv.bias is not None: flops += all_positions / divide
return flops
def get_range(self):
return [self.choices]
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, index, prob = tuple_inputs
index, prob = torch.squeeze(index).tolist(), torch.squeeze(prob)
probability = torch.squeeze(probability)
assert len(index) == 2, 'invalid length : {:}'.format(index)
# compute expected flop
#coordinates = torch.arange(self.x_range[0], self.x_range[1]+1).type_as(probability)
expected_outC = (self.choices_tensor * probability).sum()
expected_flop = self.get_flops([expected_inC, expected_outC], False, 1e6)
if self.avg : out = self.avg( inputs )
else : out = inputs
# convolutional layer
out_convs = conv_forward(out, self.conv, [self.choices[i] for i in index])
out_bns = [self.BNs[idx](out_conv) for idx, out_conv in zip(index, out_convs)]
# merge
out_channel = max([x.size(1) for x in out_bns])
outA = ChannelWiseInter(out_bns[0], out_channel)
outB = ChannelWiseInter(out_bns[1], out_channel)
out = outA * prob[0] + outB * prob[1]
#out = additive_func(out_bns[0]*prob[0], out_bns[1]*prob[1])
if self.relu : out = self.relu( out )
if self.maxpool: out = self.maxpool(out)
return out, expected_outC, expected_flop
def basic_forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.has_bn:out= self.BNs[-1]( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
if self.InShape is None:
self.InShape = (inputs.size(-2), inputs.size(-1))
self.OutShape = (out.size(-2) , out.size(-1))
if self.maxpool: out = self.maxpool(out)
return out
class ResNetBasicblock(nn.Module):
expansion = 1
num_conv = 2
def __init__(self, inplanes, planes, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ConvBNReLU(inplanes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_b = ConvBNReLU( planes, planes, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=True, has_bn=True, has_relu=False)
elif inplanes != planes:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False,has_bn=True, has_relu=False)
else:
self.downsample = None
self.out_dim = planes
self.search_mode = 'basic'
def get_range(self):
return self.conv_a.get_range() + self.conv_b.get_range()
def get_flops(self, channels):
assert len(channels) == 3, 'invalid channels : {:}'.format(channels)
flop_A = self.conv_a.get_flops([channels[0], channels[1]])
flop_B = self.conv_b.get_flops([channels[1], channels[2]])
if hasattr(self.downsample, 'get_flops'):
flop_C = self.downsample.get_flops([channels[0], channels[-1]])
else:
flop_C = 0
if channels[0] != channels[-1] and self.downsample is None: # this short-cut will be added during the infer-train
flop_C = channels[0] * channels[-1] * self.conv_b.OutShape[0] * self.conv_b.OutShape[1]
return flop_A + flop_B + flop_C
def forward(self, inputs):
if self.search_mode == 'basic' : return self.basic_forward(inputs)
elif self.search_mode == 'search': return self.search_forward(inputs)
else: raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, indexes, probs = tuple_inputs
assert indexes.size(0) == 2 and probs.size(0) == 2 and probability.size(0) == 2
#import pdb; pdb.set_trace()
out_a, expected_inC_a, expected_flop_a = self.conv_a( (inputs, expected_inC , probability[0], indexes[0], probs[0]) )
out_b, expected_inC_b, expected_flop_b = self.conv_b( (out_a , expected_inC_a, probability[1], indexes[1], probs[1]) )
if self.downsample is not None:
residual, _, expected_flop_c = self.downsample( (inputs, expected_inC , probability[1], indexes[1], probs[1]) )
else:
residual, expected_flop_c = inputs, 0
out = additive_func(residual, out_b)
return nn.functional.relu(out, inplace=True), expected_inC_b, sum([expected_flop_a, expected_flop_b, expected_flop_c])
def basic_forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, basicblock)
return nn.functional.relu(out, inplace=True)
class ResNetBottleneck(nn.Module):
expansion = 4
num_conv = 3
def __init__(self, inplanes, planes, stride):
super(ResNetBottleneck, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_1x1 = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_3x3 = ConvBNReLU( planes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
self.conv_1x4 = ConvBNReLU(planes, planes*self.expansion, 1, 1, 0, False, has_avg=False, has_bn=True, has_relu=False)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=True, has_bn=True, has_relu=False)
elif inplanes != planes*self.expansion:
self.downsample = ConvBNReLU(inplanes, planes*self.expansion, 1, 1, 0, False, has_avg=False,has_bn=True, has_relu=False)
else:
self.downsample = None
self.out_dim = planes * self.expansion
self.search_mode = 'basic'
def get_range(self):
return self.conv_1x1.get_range() + self.conv_3x3.get_range() + self.conv_1x4.get_range()
def get_flops(self, channels):
assert len(channels) == 4, 'invalid channels : {:}'.format(channels)
flop_A = self.conv_1x1.get_flops([channels[0], channels[1]])
flop_B = self.conv_3x3.get_flops([channels[1], channels[2]])
flop_C = self.conv_1x4.get_flops([channels[2], channels[3]])
if hasattr(self.downsample, 'get_flops'):
flop_D = self.downsample.get_flops([channels[0], channels[-1]])
else:
flop_D = 0
if channels[0] != channels[-1] and self.downsample is None: # this short-cut will be added during the infer-train
flop_D = channels[0] * channels[-1] * self.conv_1x4.OutShape[0] * self.conv_1x4.OutShape[1]
return flop_A + flop_B + flop_C + flop_D
def forward(self, inputs):
if self.search_mode == 'basic' : return self.basic_forward(inputs)
elif self.search_mode == 'search': return self.search_forward(inputs)
else: raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def basic_forward(self, inputs):
bottleneck = self.conv_1x1(inputs)
bottleneck = self.conv_3x3(bottleneck)
bottleneck = self.conv_1x4(bottleneck)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, bottleneck)
return nn.functional.relu(out, inplace=True)
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, indexes, probs = tuple_inputs
assert indexes.size(0) == 3 and probs.size(0) == 3 and probability.size(0) == 3
out_1x1, expected_inC_1x1, expected_flop_1x1 = self.conv_1x1( (inputs, expected_inC , probability[0], indexes[0], probs[0]) )
out_3x3, expected_inC_3x3, expected_flop_3x3 = self.conv_3x3( (out_1x1,expected_inC_1x1, probability[1], indexes[1], probs[1]) )
out_1x4, expected_inC_1x4, expected_flop_1x4 = self.conv_1x4( (out_3x3,expected_inC_3x3, probability[2], indexes[2], probs[2]) )
if self.downsample is not None:
residual, _, expected_flop_c = self.downsample( (inputs, expected_inC , probability[2], indexes[2], probs[2]) )
else:
residual, expected_flop_c = inputs, 0
out = additive_func(residual, out_1x4)
return nn.functional.relu(out, inplace=True), expected_inC_1x4, sum([expected_flop_1x1, expected_flop_3x3, expected_flop_1x4, expected_flop_c])
class SearchShapeImagenetResNet(nn.Module):
def __init__(self, block_name, layers, deep_stem, num_classes):
super(SearchShapeImagenetResNet, self).__init__()
#Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
if block_name == 'BasicBlock':
block = ResNetBasicblock
elif block_name == 'Bottleneck':
block = ResNetBottleneck
else:
raise ValueError('invalid block : {:}'.format(block_name))
self.message = 'SearchShapeCifarResNet : Depth : {:} , Layers for each block : {:}'.format(sum(layers)*block.num_conv, layers)
self.num_classes = num_classes
if not deep_stem:
self.layers = nn.ModuleList( [ ConvBNReLU(3, 64, 7, 2, 3, False, has_avg=False, has_bn=True, has_relu=True, last_max_pool=True) ] )
self.channels = [64]
else:
self.layers = nn.ModuleList( [ ConvBNReLU(3, 32, 3, 2, 1, False, has_avg=False, has_bn=True, has_relu=True)
,ConvBNReLU(32,64, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True, last_max_pool=True) ] )
self.channels = [32, 64]
meta_depth_info = get_depth_choices(layers)
self.InShape = None
self.depth_info = OrderedDict()
self.depth_at_i = OrderedDict()
for stage, layer_blocks in enumerate(layers):
cur_block_choices = meta_depth_info[stage]
assert cur_block_choices[-1] == layer_blocks, 'stage={:}, {:} vs {:}'.format(stage, cur_block_choices, layer_blocks)
block_choices, xstart = [], len(self.layers)
for iL in range(layer_blocks):
iC = self.channels[-1]
planes = 64 * (2**stage)
stride = 2 if stage > 0 and iL == 0 else 1
module = block(iC, planes, stride)
self.channels.append( module.out_dim )
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iC, module.out_dim, stride)
# added for depth
layer_index = len(self.layers) - 1
if iL + 1 in cur_block_choices: block_choices.append( layer_index )
if iL + 1 == layer_blocks:
self.depth_info[layer_index] = {'choices': block_choices,
'stage' : stage,
'xstart' : xstart}
self.depth_info_list = []
for xend, info in self.depth_info.items():
self.depth_info_list.append( (xend, info) )
xstart, xstage = info['xstart'], info['stage']
for ilayer in range(xstart, xend+1):
idx = bisect_right(info['choices'], ilayer-1)
self.depth_at_i[ilayer] = (xstage, idx)
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
self.classifier = nn.Linear(module.out_dim, num_classes)
self.InShape = None
self.tau = -1
self.search_mode = 'basic'
#assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth)
# parameters for width
self.Ranges = []
self.layer2indexRange = []
for i, layer in enumerate(self.layers):
start_index = len(self.Ranges)
self.Ranges += layer.get_range()
self.layer2indexRange.append( (start_index, len(self.Ranges)) )
self.register_parameter('width_attentions', nn.Parameter(torch.Tensor(len(self.Ranges), get_width_choices(None))))
self.register_parameter('depth_attentions', nn.Parameter(torch.Tensor(len(layers), meta_depth_info['num'])))
nn.init.normal_(self.width_attentions, 0, 0.01)
nn.init.normal_(self.depth_attentions, 0, 0.01)
self.apply(initialize_resnet)
def arch_parameters(self, LR=None):
if LR is None:
return [self.width_attentions, self.depth_attentions]
else:
return [
{"params": self.width_attentions, "lr": LR},
{"params": self.depth_attentions, "lr": LR},
]
def base_parameters(self):
return list(self.layers.parameters()) + list(self.avgpool.parameters()) + list(self.classifier.parameters())
def get_flop(self, mode, config_dict, extra_info):
if config_dict is not None: config_dict = config_dict.copy()
# select channels
channels = [3]
for i, weight in enumerate(self.width_attentions):
if mode == 'genotype':
with torch.no_grad():
probe = nn.functional.softmax(weight, dim=0)
C = self.Ranges[i][ torch.argmax(probe).item() ]
else:
raise ValueError('invalid mode : {:}'.format(mode))
channels.append( C )
# select depth
if mode == 'genotype':
with torch.no_grad():
depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
choices = torch.argmax(depth_probs, dim=1).cpu().tolist()
else:
raise ValueError('invalid mode : {:}'.format(mode))
selected_layers = []
for choice, xvalue in zip(choices, self.depth_info_list):
xtemp = xvalue[1]['choices'][choice] - xvalue[1]['xstart'] + 1
selected_layers.append(xtemp)
flop = 0
for i, layer in enumerate(self.layers):
s, e = self.layer2indexRange[i]
xchl = tuple( channels[s:e+1] )
if i in self.depth_at_i:
xstagei, xatti = self.depth_at_i[i]
if xatti <= choices[xstagei]: # leave this depth
flop+= layer.get_flops(xchl)
else:
flop+= 0 # do not use this layer
else:
flop+= layer.get_flops(xchl)
# the last fc layer
flop += channels[-1] * self.classifier.out_features
if config_dict is None:
return flop / 1e6
else:
config_dict['xchannels'] = channels
config_dict['xblocks'] = selected_layers
config_dict['super_type'] = 'infer-shape'
config_dict['estimated_FLOP'] = flop / 1e6
return flop / 1e6, config_dict
def get_arch_info(self):
string = "for depth and width, there are {:} + {:} attention probabilities.".format(len(self.depth_attentions), len(self.width_attentions))
string+= '\n{:}'.format(self.depth_info)
discrepancy = []
with torch.no_grad():
for i, att in enumerate(self.depth_attentions):
prob = nn.functional.softmax(att, dim=0)
prob = prob.cpu() ; selc = prob.argmax().item() ; prob = prob.tolist()
prob = ['{:.3f}'.format(x) for x in prob]
xstring = '{:03d}/{:03d}-th : {:}'.format(i, len(self.depth_attentions), ' '.join(prob))
logt = ['{:.4f}'.format(x) for x in att.cpu().tolist()]
xstring += ' || {:17s}'.format(' '.join(logt))
prob = sorted( [float(x) for x in prob] )
disc = prob[-1] - prob[-2]
xstring += ' || discrepancy={:.2f} || select={:}/{:}'.format(disc, selc, len(prob))
discrepancy.append( disc )
string += '\n{:}'.format(xstring)
string += '\n-----------------------------------------------'
for i, att in enumerate(self.width_attentions):
prob = nn.functional.softmax(att, dim=0)
prob = prob.cpu() ; selc = prob.argmax().item() ; prob = prob.tolist()
prob = ['{:.3f}'.format(x) for x in prob]
xstring = '{:03d}/{:03d}-th : {:}'.format(i, len(self.width_attentions), ' '.join(prob))
logt = ['{:.3f}'.format(x) for x in att.cpu().tolist()]
xstring += ' || {:52s}'.format(' '.join(logt))
prob = sorted( [float(x) for x in prob] )
disc = prob[-1] - prob[-2]
xstring += ' || dis={:.2f} || select={:}/{:}'.format(disc, selc, len(prob))
discrepancy.append( disc )
string += '\n{:}'.format(xstring)
return string, discrepancy
def set_tau(self, tau_max, tau_min, epoch_ratio):
assert epoch_ratio >= 0 and epoch_ratio <= 1, 'invalid epoch-ratio : {:}'.format(epoch_ratio)
tau = tau_min + (tau_max-tau_min) * (1 + math.cos(math.pi * epoch_ratio)) / 2
self.tau = tau
def get_message(self):
return self.message
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, inputs):
flop_width_probs = nn.functional.softmax(self.width_attentions, dim=1)
flop_depth_probs = nn.functional.softmax(self.depth_attentions, dim=1)
flop_depth_probs = torch.flip( torch.cumsum( torch.flip(flop_depth_probs, [1]), 1 ), [1] )
selected_widths, selected_width_probs = select2withP(self.width_attentions, self.tau)
selected_depth_probs = select2withP(self.depth_attentions, self.tau, True)
with torch.no_grad():
selected_widths = selected_widths.cpu()
x, last_channel_idx, expected_inC, flops = inputs, 0, 3, []
feature_maps = []
for i, layer in enumerate(self.layers):
selected_w_index = selected_widths [last_channel_idx: last_channel_idx+layer.num_conv]
selected_w_probs = selected_width_probs[last_channel_idx: last_channel_idx+layer.num_conv]
layer_prob = flop_width_probs [last_channel_idx: last_channel_idx+layer.num_conv]
x, expected_inC, expected_flop = layer( (x, expected_inC, layer_prob, selected_w_index, selected_w_probs) )
feature_maps.append( x )
last_channel_idx += layer.num_conv
if i in self.depth_info: # aggregate the information
choices = self.depth_info[i]['choices']
xstagei = self.depth_info[i]['stage']
#print ('iL={:}, choices={:}, stage={:}, probs={:}'.format(i, choices, xstagei, selected_depth_probs[xstagei].cpu().tolist()))
#for A, W in zip(choices, selected_depth_probs[xstagei]):
# print('Size = {:}, W = {:}'.format(feature_maps[A].size(), W))
possible_tensors = []
max_C = max( feature_maps[A].size(1) for A in choices )
for tempi, A in enumerate(choices):
xtensor = ChannelWiseInter(feature_maps[A], max_C)
possible_tensors.append( xtensor )
weighted_sum = sum( xtensor * W for xtensor, W in zip(possible_tensors, selected_depth_probs[xstagei]) )
x = weighted_sum
if i in self.depth_at_i:
xstagei, xatti = self.depth_at_i[i]
x_expected_flop = flop_depth_probs[xstagei, xatti] * expected_flop
else:
x_expected_flop = expected_flop
flops.append( x_expected_flop )
flops.append( expected_inC * (self.classifier.out_features*1.0/1e6) )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = linear_forward(features, self.classifier)
return logits, torch.stack( [sum(flops)] )
def basic_forward(self, inputs):
if self.InShape is None: self.InShape = (inputs.size(-2), inputs.size(-1))
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,316 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, torch
import torch.nn as nn
from ..initialization import initialize_resnet
from ..SharedUtils import additive_func
from .SoftSelect import select2withP, ChannelWiseInter
from .SoftSelect import linear_forward
from .SoftSelect import get_width_choices as get_choices
def conv_forward(inputs, conv, choices):
iC = conv.in_channels
fill_size = list(inputs.size())
fill_size[1] = iC - fill_size[1]
filled = torch.zeros(fill_size, device=inputs.device)
xinputs = torch.cat((inputs, filled), dim=1)
outputs = conv(xinputs)
selecteds = [outputs[:,:oC] for oC in choices]
return selecteds
class ConvBNReLU(nn.Module):
num_conv = 1
def __init__(self, nIn, nOut, kernel, stride, padding, bias, has_avg, has_bn, has_relu):
super(ConvBNReLU, self).__init__()
self.InShape = None
self.OutShape = None
self.choices = get_choices(nOut)
self.register_buffer('choices_tensor', torch.Tensor( self.choices ))
if has_avg : self.avg = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else : self.avg = None
self.conv = nn.Conv2d(nIn, nOut, kernel_size=kernel, stride=stride, padding=padding, dilation=1, groups=1, bias=bias)
#if has_bn : self.bn = nn.BatchNorm2d(nOut)
#else : self.bn = None
self.has_bn = has_bn
self.BNs = nn.ModuleList()
for i, _out in enumerate(self.choices):
self.BNs.append(nn.BatchNorm2d(_out))
if has_relu: self.relu = nn.ReLU(inplace=True)
else : self.relu = None
self.in_dim = nIn
self.out_dim = nOut
self.search_mode = 'basic'
def get_flops(self, channels, check_range=True, divide=1):
iC, oC = channels
if check_range: assert iC <= self.conv.in_channels and oC <= self.conv.out_channels, '{:} vs {:} | {:} vs {:}'.format(iC, self.conv.in_channels, oC, self.conv.out_channels)
assert isinstance(self.InShape, tuple) and len(self.InShape) == 2, 'invalid in-shape : {:}'.format(self.InShape)
assert isinstance(self.OutShape, tuple) and len(self.OutShape) == 2, 'invalid out-shape : {:}'.format(self.OutShape)
#conv_per_position_flops = self.conv.kernel_size[0] * self.conv.kernel_size[1] * iC * oC / self.conv.groups
conv_per_position_flops = (self.conv.kernel_size[0] * self.conv.kernel_size[1] * 1.0 / self.conv.groups)
all_positions = self.OutShape[0] * self.OutShape[1]
flops = (conv_per_position_flops * all_positions / divide) * iC * oC
if self.conv.bias is not None: flops += all_positions / divide
return flops
def get_range(self):
return [self.choices]
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, index, prob = tuple_inputs
index, prob = torch.squeeze(index).tolist(), torch.squeeze(prob)
probability = torch.squeeze(probability)
assert len(index) == 2, 'invalid length : {:}'.format(index)
# compute expected flop
#coordinates = torch.arange(self.x_range[0], self.x_range[1]+1).type_as(probability)
expected_outC = (self.choices_tensor * probability).sum()
expected_flop = self.get_flops([expected_inC, expected_outC], False, 1e6)
if self.avg : out = self.avg( inputs )
else : out = inputs
# convolutional layer
out_convs = conv_forward(out, self.conv, [self.choices[i] for i in index])
out_bns = [self.BNs[idx](out_conv) for idx, out_conv in zip(index, out_convs)]
# merge
out_channel = max([x.size(1) for x in out_bns])
outA = ChannelWiseInter(out_bns[0], out_channel)
outB = ChannelWiseInter(out_bns[1], out_channel)
out = outA * prob[0] + outB * prob[1]
#out = additive_func(out_bns[0]*prob[0], out_bns[1]*prob[1])
if self.relu: out = self.relu( out )
else : out = out
return out, expected_outC, expected_flop
def basic_forward(self, inputs):
if self.avg : out = self.avg( inputs )
else : out = inputs
conv = self.conv( out )
if self.has_bn:out= self.BNs[-1]( conv )
else : out = conv
if self.relu: out = self.relu( out )
else : out = out
if self.InShape is None:
self.InShape = (inputs.size(-2), inputs.size(-1))
self.OutShape = (out.size(-2) , out.size(-1))
return out
class SimBlock(nn.Module):
expansion = 1
num_conv = 1
def __init__(self, inplanes, planes, stride):
super(SimBlock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv = ConvBNReLU(inplanes, planes, 3, stride, 1, False, has_avg=False, has_bn=True, has_relu=True)
if stride == 2:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=True, has_bn=False, has_relu=False)
elif inplanes != planes:
self.downsample = ConvBNReLU(inplanes, planes, 1, 1, 0, False, has_avg=False,has_bn=True , has_relu=False)
else:
self.downsample = None
self.out_dim = planes
self.search_mode = 'basic'
def get_range(self):
return self.conv.get_range()
def get_flops(self, channels):
assert len(channels) == 2, 'invalid channels : {:}'.format(channels)
flop_A = self.conv.get_flops([channels[0], channels[1]])
if hasattr(self.downsample, 'get_flops'):
flop_C = self.downsample.get_flops([channels[0], channels[-1]])
else:
flop_C = 0
if channels[0] != channels[-1] and self.downsample is None: # this short-cut will be added during the infer-train
flop_C = channels[0] * channels[-1] * self.conv.OutShape[0] * self.conv.OutShape[1]
return flop_A + flop_C
def forward(self, inputs):
if self.search_mode == 'basic' : return self.basic_forward(inputs)
elif self.search_mode == 'search': return self.search_forward(inputs)
else: raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, tuple_inputs):
assert isinstance(tuple_inputs, tuple) and len(tuple_inputs) == 5, 'invalid type input : {:}'.format( type(tuple_inputs) )
inputs, expected_inC, probability, indexes, probs = tuple_inputs
assert indexes.size(0) == 1 and probs.size(0) == 1 and probability.size(0) == 1, 'invalid size : {:}, {:}, {:}'.format(indexes.size(), probs.size(), probability.size())
out, expected_next_inC, expected_flop = self.conv( (inputs, expected_inC , probability[0], indexes[0], probs[0]) )
if self.downsample is not None:
residual, _, expected_flop_c = self.downsample( (inputs, expected_inC , probability[-1], indexes[-1], probs[-1]) )
else:
residual, expected_flop_c = inputs, 0
out = additive_func(residual, out)
return nn.functional.relu(out, inplace=True), expected_next_inC, sum([expected_flop, expected_flop_c])
def basic_forward(self, inputs):
basicblock = self.conv(inputs)
if self.downsample is not None: residual = self.downsample(inputs)
else : residual = inputs
out = additive_func(residual, basicblock)
return nn.functional.relu(out, inplace=True)
class SearchWidthSimResNet(nn.Module):
def __init__(self, depth, num_classes):
super(SearchWidthSimResNet, self).__init__()
assert (depth - 2) % 3 == 0, 'depth should be one of 5, 8, 11, 14, ... instead of {:}'.format(depth)
layer_blocks = (depth - 2) // 3
self.message = 'SearchWidthSimResNet : Depth : {:} , Layers for each block : {:}'.format(depth, layer_blocks)
self.num_classes = num_classes
self.channels = [16]
self.layers = nn.ModuleList( [ ConvBNReLU(3, 16, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True) ] )
self.InShape = None
for stage in range(3):
for iL in range(layer_blocks):
iC = self.channels[-1]
planes = 16 * (2**stage)
stride = 2 if stage > 0 and iL == 0 else 1
module = SimBlock(iC, planes, stride)
self.channels.append( module.out_dim )
self.layers.append ( module )
self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format(stage, iL, layer_blocks, len(self.layers)-1, iC, module.out_dim, stride)
self.avgpool = nn.AvgPool2d(8)
self.classifier = nn.Linear(module.out_dim, num_classes)
self.InShape = None
self.tau = -1
self.search_mode = 'basic'
#assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth)
# parameters for width
self.Ranges = []
self.layer2indexRange = []
for i, layer in enumerate(self.layers):
start_index = len(self.Ranges)
self.Ranges += layer.get_range()
self.layer2indexRange.append( (start_index, len(self.Ranges)) )
assert len(self.Ranges) + 1 == depth, 'invalid depth check {:} vs {:}'.format(len(self.Ranges) + 1, depth)
self.register_parameter('width_attentions', nn.Parameter(torch.Tensor(len(self.Ranges), get_choices(None))))
nn.init.normal_(self.width_attentions, 0, 0.01)
self.apply(initialize_resnet)
def arch_parameters(self):
return [self.width_attentions]
def base_parameters(self):
return list(self.layers.parameters()) + list(self.avgpool.parameters()) + list(self.classifier.parameters())
def get_flop(self, mode, config_dict, extra_info):
if config_dict is not None: config_dict = config_dict.copy()
#weights = [F.softmax(x, dim=0) for x in self.width_attentions]
channels = [3]
for i, weight in enumerate(self.width_attentions):
if mode == 'genotype':
with torch.no_grad():
probe = nn.functional.softmax(weight, dim=0)
C = self.Ranges[i][ torch.argmax(probe).item() ]
elif mode == 'max':
C = self.Ranges[i][-1]
elif mode == 'fix':
C = int( math.sqrt( extra_info ) * self.Ranges[i][-1] )
elif mode == 'random':
assert isinstance(extra_info, float), 'invalid extra_info : {:}'.format(extra_info)
with torch.no_grad():
prob = nn.functional.softmax(weight, dim=0)
approximate_C = int( math.sqrt( extra_info ) * self.Ranges[i][-1] )
for j in range(prob.size(0)):
prob[j] = 1 / (abs(j - (approximate_C-self.Ranges[i][j])) + 0.2)
C = self.Ranges[i][ torch.multinomial(prob, 1, False).item() ]
else:
raise ValueError('invalid mode : {:}'.format(mode))
channels.append( C )
flop = 0
for i, layer in enumerate(self.layers):
s, e = self.layer2indexRange[i]
xchl = tuple( channels[s:e+1] )
flop+= layer.get_flops(xchl)
# the last fc layer
flop += channels[-1] * self.classifier.out_features
if config_dict is None:
return flop / 1e6
else:
config_dict['xchannels'] = channels
config_dict['super_type'] = 'infer-width'
config_dict['estimated_FLOP'] = flop / 1e6
return flop / 1e6, config_dict
def get_arch_info(self):
string = "for width, there are {:} attention probabilities.".format(len(self.width_attentions))
discrepancy = []
with torch.no_grad():
for i, att in enumerate(self.width_attentions):
prob = nn.functional.softmax(att, dim=0)
prob = prob.cpu() ; selc = prob.argmax().item() ; prob = prob.tolist()
prob = ['{:.3f}'.format(x) for x in prob]
xstring = '{:03d}/{:03d}-th : {:}'.format(i, len(self.width_attentions), ' '.join(prob))
logt = ['{:.3f}'.format(x) for x in att.cpu().tolist()]
xstring += ' || {:52s}'.format(' '.join(logt))
prob = sorted( [float(x) for x in prob] )
disc = prob[-1] - prob[-2]
xstring += ' || dis={:.2f} || select={:}/{:}'.format(disc, selc, len(prob))
discrepancy.append( disc )
string += '\n{:}'.format(xstring)
return string, discrepancy
def set_tau(self, tau_max, tau_min, epoch_ratio):
assert epoch_ratio >= 0 and epoch_ratio <= 1, 'invalid epoch-ratio : {:}'.format(epoch_ratio)
tau = tau_min + (tau_max-tau_min) * (1 + math.cos(math.pi * epoch_ratio)) / 2
self.tau = tau
def get_message(self):
return self.message
def forward(self, inputs):
if self.search_mode == 'basic':
return self.basic_forward(inputs)
elif self.search_mode == 'search':
return self.search_forward(inputs)
else:
raise ValueError('invalid search_mode = {:}'.format(self.search_mode))
def search_forward(self, inputs):
flop_probs = nn.functional.softmax(self.width_attentions, dim=1)
selected_widths, selected_probs = select2withP(self.width_attentions, self.tau)
with torch.no_grad():
selected_widths = selected_widths.cpu()
x, last_channel_idx, expected_inC, flops = inputs, 0, 3, []
for i, layer in enumerate(self.layers):
selected_w_index = selected_widths[last_channel_idx: last_channel_idx+layer.num_conv]
selected_w_probs = selected_probs[last_channel_idx: last_channel_idx+layer.num_conv]
layer_prob = flop_probs[last_channel_idx: last_channel_idx+layer.num_conv]
x, expected_inC, expected_flop = layer( (x, expected_inC, layer_prob, selected_w_index, selected_w_probs) )
last_channel_idx += layer.num_conv
flops.append( expected_flop )
flops.append( expected_inC * (self.classifier.out_features*1.0/1e6) )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = linear_forward(features, self.classifier)
return logits, torch.stack( [sum(flops)] )
def basic_forward(self, inputs):
if self.InShape is None: self.InShape = (inputs.size(-2), inputs.size(-1))
x = inputs
for i, layer in enumerate(self.layers):
x = layer( x )
features = self.avgpool(x)
features = features.view(features.size(0), -1)
logits = self.classifier(features)
return features, logits

View File

@@ -0,0 +1,111 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import math, torch
import torch.nn as nn
def select2withP(logits, tau, just_prob=False, num=2, eps=1e-7):
if tau <= 0:
new_logits = logits
probs = nn.functional.softmax(new_logits, dim=1)
else :
while True: # a trick to avoid the gumbels bug
gumbels = -torch.empty_like(logits).exponential_().log()
new_logits = (logits.log_softmax(dim=1) + gumbels) / tau
probs = nn.functional.softmax(new_logits, dim=1)
if (not torch.isinf(gumbels).any()) and (not torch.isinf(probs).any()) and (not torch.isnan(probs).any()): break
if just_prob: return probs
#with torch.no_grad(): # add eps for unexpected torch error
# probs = nn.functional.softmax(new_logits, dim=1)
# selected_index = torch.multinomial(probs + eps, 2, False)
with torch.no_grad(): # add eps for unexpected torch error
probs = probs.cpu()
selected_index = torch.multinomial(probs + eps, num, False).to(logits.device)
selected_logit = torch.gather(new_logits, 1, selected_index)
selcted_probs = nn.functional.softmax(selected_logit, dim=1)
return selected_index, selcted_probs
def ChannelWiseInter(inputs, oC, mode='v2'):
if mode == 'v1':
return ChannelWiseInterV1(inputs, oC)
elif mode == 'v2':
return ChannelWiseInterV2(inputs, oC)
else:
raise ValueError('invalid mode : {:}'.format(mode))
def ChannelWiseInterV1(inputs, oC):
assert inputs.dim() == 4, 'invalid dimension : {:}'.format(inputs.size())
def start_index(a, b, c):
return int( math.floor(float(a * c) / b) )
def end_index(a, b, c):
return int( math.ceil(float((a + 1) * c) / b) )
batch, iC, H, W = inputs.size()
outputs = torch.zeros((batch, oC, H, W), dtype=inputs.dtype, device=inputs.device)
if iC == oC: return inputs
for ot in range(oC):
istartT, iendT = start_index(ot, oC, iC), end_index(ot, oC, iC)
values = inputs[:, istartT:iendT].mean(dim=1)
outputs[:, ot, :, :] = values
return outputs
def ChannelWiseInterV2(inputs, oC):
assert inputs.dim() == 4, 'invalid dimension : {:}'.format(inputs.size())
batch, C, H, W = inputs.size()
if C == oC: return inputs
else : return nn.functional.adaptive_avg_pool3d(inputs, (oC,H,W))
#inputs_5D = inputs.view(batch, 1, C, H, W)
#otputs_5D = nn.functional.interpolate(inputs_5D, (oC,H,W), None, 'area', None)
#otputs = otputs_5D.view(batch, oC, H, W)
#otputs_5D = nn.functional.interpolate(inputs_5D, (oC,H,W), None, 'trilinear', False)
#return otputs
def linear_forward(inputs, linear):
if linear is None: return inputs
iC = inputs.size(1)
weight = linear.weight[:, :iC]
if linear.bias is None: bias = None
else : bias = linear.bias
return nn.functional.linear(inputs, weight, bias)
def get_width_choices(nOut):
xsrange = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
if nOut is None:
return len(xsrange)
else:
Xs = [int(nOut * i) for i in xsrange]
#xs = [ int(nOut * i // 10) for i in range(2, 11)]
#Xs = [x for i, x in enumerate(xs) if i+1 == len(xs) or xs[i+1] > x+1]
Xs = sorted( list( set(Xs) ) )
return tuple(Xs)
def get_depth_choices(nDepth):
if nDepth is None:
return 3
else:
assert nDepth >= 3, 'nDepth should be greater than 2 vs {:}'.format(nDepth)
if nDepth == 1 : return (1, 1, 1)
elif nDepth == 2: return (1, 1, 2)
elif nDepth >= 3:
return (nDepth//3, nDepth*2//3, nDepth)
else:
raise ValueError('invalid Depth : {:}'.format(nDepth))
def drop_path(x, drop_prob):
if drop_prob > 0.:
keep_prob = 1. - drop_prob
mask = x.new_zeros(x.size(0), 1, 1, 1)
mask = mask.bernoulli_(keep_prob)
x = x * (mask / keep_prob)
#x.div_(keep_prob)
#x.mul_(mask)
return x

View File

@@ -0,0 +1,8 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
from .SearchCifarResNet_width import SearchWidthCifarResNet
from .SearchCifarResNet_depth import SearchDepthCifarResNet
from .SearchCifarResNet import SearchShapeCifarResNet
from .SearchSimResNet_width import SearchWidthSimResNet
from .SearchImagenetResNet import SearchShapeImagenetResNet

View File

@@ -0,0 +1,20 @@
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import torch
import torch.nn as nn
from SoftSelect import ChannelWiseInter
if __name__ == '__main__':
tensors = torch.rand((16, 128, 7, 7))
for oc in range(200, 210):
out_v1 = ChannelWiseInter(tensors, oc, 'v1')
out_v2 = ChannelWiseInter(tensors, oc, 'v2')
assert (out_v1 == out_v2).any().item() == 1
for oc in range(48, 160):
out_v1 = ChannelWiseInter(tensors, oc, 'v1')
out_v2 = ChannelWiseInter(tensors, oc, 'v2')
assert (out_v1 == out_v2).any().item() == 1

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,65 @@
"""Base operations used by the modules in this search space."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvBnRelu(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0):
super(ConvBnRelu, self).__init__()
self.conv_bn_relu = nn.Sequential(
#nn.ReLU(),
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
nn.BatchNorm2d(out_channels),
#nn.ReLU(inplace=True)
nn.ReLU()
)
def forward(self, x):
return self.conv_bn_relu(x)
class Conv3x3BnRelu(nn.Module):
"""3x3 convolution with batch norm and ReLU activation."""
def __init__(self, in_channels, out_channels):
super(Conv3x3BnRelu, self).__init__()
self.conv3x3 = ConvBnRelu(in_channels, out_channels, 3, 1, 1)
def forward(self, x):
x = self.conv3x3(x)
return x
class Conv1x1BnRelu(nn.Module):
"""1x1 convolution with batch norm and ReLU activation."""
def __init__(self, in_channels, out_channels):
super(Conv1x1BnRelu, self).__init__()
self.conv1x1 = ConvBnRelu(in_channels, out_channels, 1, 1, 0)
def forward(self, x):
x = self.conv1x1(x)
return x
class MaxPool3x3(nn.Module):
"""3x3 max pool with no subsampling."""
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
super(MaxPool3x3, self).__init__()
self.maxpool = nn.MaxPool2d(kernel_size, stride, padding)
#self.maxpool = nn.AvgPool2d(kernel_size, stride, padding)
def forward(self, x):
x = self.maxpool(x)
return x
# Commas should not be used in op names
OP_MAP = {
'conv3x3-bn-relu': Conv3x3BnRelu,
'conv1x1-bn-relu': Conv1x1BnRelu,
'maxpool3x3': MaxPool3x3
}

View File

@@ -0,0 +1,167 @@
# Copyright 2019 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions used by generate_graph.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import itertools
import numpy as np
def gen_is_edge_fn(bits):
"""Generate a boolean function for the edge connectivity.
Given a bitstring FEDCBA and a 4x4 matrix, the generated matrix is
[[0, A, B, D],
[0, 0, C, E],
[0, 0, 0, F],
[0, 0, 0, 0]]
Note that this function is agnostic to the actual matrix dimension due to
order in which elements are filled out (column-major, starting from least
significant bit). For example, the same FEDCBA bitstring (0-padded) on a 5x5
matrix is
[[0, A, B, D, 0],
[0, 0, C, E, 0],
[0, 0, 0, F, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]
Args:
bits: integer which will be interpreted as a bit mask.
Returns:
vectorized function that returns True when an edge is present.
"""
def is_edge(x, y):
"""Is there an edge from x to y (0-indexed)?"""
if x >= y:
return 0
# Map x, y to index into bit string
index = x + (y * (y - 1) // 2)
return (bits >> index) % 2 == 1
return np.vectorize(is_edge)
def is_full_dag(matrix):
"""Full DAG == all vertices on a path from vert 0 to (V-1).
i.e. no disconnected or "hanging" vertices.
It is sufficient to check for:
1) no rows of 0 except for row V-1 (only output vertex has no out-edges)
2) no cols of 0 except for col 0 (only input vertex has no in-edges)
Args:
matrix: V x V upper-triangular adjacency matrix
Returns:
True if the there are no dangling vertices.
"""
shape = np.shape(matrix)
rows = matrix[:shape[0]-1, :] == 0
rows = np.all(rows, axis=1) # Any row with all 0 will be True
rows_bad = np.any(rows)
cols = matrix[:, 1:] == 0
cols = np.all(cols, axis=0) # Any col with all 0 will be True
cols_bad = np.any(cols)
return (not rows_bad) and (not cols_bad)
def num_edges(matrix):
"""Computes number of edges in adjacency matrix."""
return np.sum(matrix)
def hash_module(matrix, labeling):
"""Computes a graph-invariance MD5 hash of the matrix and label pair.
Args:
matrix: np.ndarray square upper-triangular adjacency matrix.
labeling: list of int labels of length equal to both dimensions of
matrix.
Returns:
MD5 hash of the matrix and labeling.
"""
vertices = np.shape(matrix)[0]
in_edges = np.sum(matrix, axis=0).tolist()
out_edges = np.sum(matrix, axis=1).tolist()
assert len(in_edges) == len(out_edges) == len(labeling)
hashes = list(zip(out_edges, in_edges, labeling))
hashes = [hashlib.md5(str(h).encode('utf-8')).hexdigest() for h in hashes]
# Computing this up to the diameter is probably sufficient but since the
# operation is fast, it is okay to repeat more times.
for _ in range(vertices):
new_hashes = []
for v in range(vertices):
in_neighbors = [hashes[w] for w in range(vertices) if matrix[w, v]]
out_neighbors = [hashes[w] for w in range(vertices) if matrix[v, w]]
new_hashes.append(hashlib.md5(
(''.join(sorted(in_neighbors)) + '|' +
''.join(sorted(out_neighbors)) + '|' +
hashes[v]).encode('utf-8')).hexdigest())
hashes = new_hashes
fingerprint = hashlib.md5(str(sorted(hashes)).encode('utf-8')).hexdigest()
return fingerprint
def permute_graph(graph, label, permutation):
"""Permutes the graph and labels based on permutation.
Args:
graph: np.ndarray adjacency matrix.
label: list of labels of same length as graph dimensions.
permutation: a permutation list of ints of same length as graph dimensions.
Returns:
np.ndarray where vertex permutation[v] is vertex v from the original graph
"""
# vertex permutation[v] in new graph is vertex v in the old graph
forward_perm = zip(permutation, list(range(len(permutation))))
inverse_perm = [x[1] for x in sorted(forward_perm)]
edge_fn = lambda x, y: graph[inverse_perm[x], inverse_perm[y]] == 1
new_matrix = np.fromfunction(np.vectorize(edge_fn),
(len(label), len(label)),
dtype=np.int8)
new_label = [label[inverse_perm[i]] for i in range(len(label))]
return new_matrix, new_label
def is_isomorphic(graph1, graph2):
"""Exhaustively checks if 2 graphs are isomorphic."""
matrix1, label1 = np.array(graph1[0]), graph1[1]
matrix2, label2 = np.array(graph2[0]), graph2[1]
assert np.shape(matrix1) == np.shape(matrix2)
assert len(label1) == len(label2)
vertices = np.shape(matrix1)[0]
# Note: input and output in our constrained graphs always map to themselves
# but this script does not enforce that.
for perm in itertools.permutations(range(0, vertices)):
pmatrix1, plabel1 = permute_graph(matrix1, label1, perm)
if np.array_equal(pmatrix1, matrix2) and plabel1 == label2:
return True
return False

View File

@@ -0,0 +1,252 @@
"""Builds the Pytorch computational graph.
Tensors flowing into a single vertex are added together for all vertices
except the output, which is concatenated instead. Tensors flowing out of input
are always added.
If interior edge channels don't match, drop the extra channels (channels are
guaranteed non-decreasing). Tensors flowing out of the input as always
projected instead.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import math
from .base_ops import *
import torch
import torch.nn as nn
import torch.nn.functional as F
class Network(nn.Module):
def __init__(self, spec, args, searchspace=[]):
super(Network, self).__init__()
self.layers = nn.ModuleList([])
in_channels = 3
out_channels = args.stem_out_channels
# initial stem convolution
stem_conv = ConvBnRelu(in_channels, out_channels, 3, 1, 1)
self.layers.append(stem_conv)
in_channels = out_channels
for stack_num in range(args.num_stacks):
if stack_num > 0:
#downsample = nn.MaxPool2d(kernel_size=3, stride=2)
downsample = nn.MaxPool2d(kernel_size=2, stride=2)
#downsample = nn.AvgPool2d(kernel_size=2, stride=2)
#downsample = nn.Conv2d(in_channels, out_channels, kernel_size=(2, 2), stride=2)
self.layers.append(downsample)
out_channels *= 2
for module_num in range(args.num_modules_per_stack):
cell = Cell(spec, in_channels, out_channels)
self.layers.append(cell)
in_channels = out_channels
self.classifier = nn.Linear(out_channels, args.num_labels)
# for DARTS search
num_edge = np.shape(spec.matrix)[0]
self.arch_parameters = nn.Parameter( 1e-3*torch.randn(num_edge, len(searchspace)))
self._initialize_weights()
def forward(self, x, get_ints=True):
ints = []
for _, layer in enumerate(self.layers):
x = layer(x)
ints.append(x)
out = torch.mean(x, (2, 3))
ints.append(out)
out = self.classifier(out)
if get_ints:
return out, ints[-1]
else:
return out
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2.0 / n))
if m.bias is not None:
m.bias.data.zero_()
pass
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
pass
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
pass
def get_weights(self):
xlist = []
for m in self.modules():
xlist.append(m.parameters())
return xlist
def get_alphas(self):
return [self.arch_parameters]
def genotype(self):
return str(spec)
class Cell(nn.Module):
"""
Builds the model using the adjacency matrix and op labels specified. Channels
controls the module output channel count but the interior channels are
determined via equally splitting the channel count whenever there is a
concatenation of Tensors.
"""
def __init__(self, spec, in_channels, out_channels):
super(Cell, self).__init__()
self.spec = spec
self.num_vertices = np.shape(self.spec.matrix)[0]
# vertex_channels[i] = number of output channels of vertex i
self.vertex_channels = ComputeVertexChannels(in_channels, out_channels, self.spec.matrix)
#self.vertex_channels = [in_channels] + [out_channels] * (self.num_vertices - 1)
# operation for each node
self.vertex_op = nn.ModuleList([None])
for t in range(1, self.num_vertices-1):
op = OP_MAP[spec.ops[t]](self.vertex_channels[t], self.vertex_channels[t])
self.vertex_op.append(op)
# operation for input on each vertex
self.input_op = nn.ModuleList([None])
for t in range(1, self.num_vertices):
if self.spec.matrix[0, t]:
self.input_op.append(Projection(in_channels, self.vertex_channels[t]))
else:
self.input_op.append(None)
def forward(self, x):
tensors = [x]
out_concat = []
for t in range(1, self.num_vertices-1):
fan_in = [Truncate(tensors[src], self.vertex_channels[t]) for src in range(1, t) if self.spec.matrix[src, t]]
fan_in_inds = [src for src in range(1, t) if self.spec.matrix[src, t]]
if self.spec.matrix[0, t]:
fan_in.append(self.input_op[t](x))
fan_in_inds = [0] + fan_in_inds
# perform operation on node
#vertex_input = torch.stack(fan_in, dim=0).sum(dim=0)
vertex_input = sum(fan_in)
#vertex_input = sum(fan_in) / len(fan_in)
vertex_output = self.vertex_op[t](vertex_input)
tensors.append(vertex_output)
if self.spec.matrix[t, self.num_vertices-1]:
out_concat.append(tensors[t])
if not out_concat: # empty list
assert self.spec.matrix[0, self.num_vertices-1]
outputs = self.input_op[self.num_vertices-1](tensors[0])
else:
if len(out_concat) == 1:
outputs = out_concat[0]
else:
outputs = torch.cat(out_concat, 1)
if self.spec.matrix[0, self.num_vertices-1]:
outputs += self.input_op[self.num_vertices-1](tensors[0])
#if self.spec.matrix[0, self.num_vertices-1]:
# out_concat.append(self.input_op[self.num_vertices-1](tensors[0]))
#outputs = sum(out_concat) / len(out_concat)
return outputs
def Projection(in_channels, out_channels):
"""1x1 projection (as in ResNet) followed by batch normalization and ReLU."""
return ConvBnRelu(in_channels, out_channels, 1)
def Truncate(inputs, channels):
"""Slice the inputs to channels if necessary."""
input_channels = inputs.size()[1]
if input_channels < channels:
raise ValueError('input channel < output channels for truncate')
elif input_channels == channels:
return inputs # No truncation necessary
else:
# Truncation should only be necessary when channel division leads to
# vertices with +1 channels. The input vertex should always be projected to
# the minimum channel count.
assert input_channels - channels == 1
return inputs[:, :channels, :, :]
def ComputeVertexChannels(in_channels, out_channels, matrix):
"""Computes the number of channels at every vertex.
Given the input channels and output channels, this calculates the number of
channels at each interior vertex. Interior vertices have the same number of
channels as the max of the channels of the vertices it feeds into. The output
channels are divided amongst the vertices that are directly connected to it.
When the division is not even, some vertices may receive an extra channel to
compensate.
Returns:
list of channel counts, in order of the vertices.
"""
num_vertices = np.shape(matrix)[0]
vertex_channels = [0] * num_vertices
vertex_channels[0] = in_channels
vertex_channels[num_vertices - 1] = out_channels
if num_vertices == 2:
# Edge case where module only has input and output vertices
return vertex_channels
# Compute the in-degree ignoring input, axis 0 is the src vertex and axis 1 is
# the dst vertex. Summing over 0 gives the in-degree count of each vertex.
in_degree = np.sum(matrix[1:], axis=0)
interior_channels = out_channels // in_degree[num_vertices - 1]
correction = out_channels % in_degree[num_vertices - 1] # Remainder to add
# Set channels of vertices that flow directly to output
for v in range(1, num_vertices - 1):
if matrix[v, num_vertices - 1]:
vertex_channels[v] = interior_channels
if correction:
vertex_channels[v] += 1
correction -= 1
# Set channels for all other vertices to the max of the out edges, going
# backwards. (num_vertices - 2) index skipped because it only connects to
# output.
for v in range(num_vertices - 3, 0, -1):
if not matrix[v, num_vertices - 1]:
for dst in range(v + 1, num_vertices - 1):
if matrix[v, dst]:
vertex_channels[v] = max(vertex_channels[v], vertex_channels[dst])
assert vertex_channels[v] > 0
# Sanity check, verify that channels never increase and final channels add up.
final_fan_in = 0
for v in range(1, num_vertices - 1):
if matrix[v, num_vertices - 1]:
final_fan_in += vertex_channels[v]
for dst in range(v + 1, num_vertices - 1):
if matrix[v, dst]:
assert vertex_channels[v] >= vertex_channels[dst]
assert final_fan_in == out_channels or num_vertices == 2
# num_vertices == 2 means only input/output nodes, so 0 fan-in
return vertex_channels

View File

@@ -0,0 +1,152 @@
"""Model specification for module connectivity individuals.
This module handles pruning the unused parts of the computation graph but should
avoid creating any TensorFlow models (this is done inside model_builder.py).
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import numpy as np
from . import graph_util
# Graphviz is optional and only required for visualization.
try:
import graphviz # pylint: disable=g-import-not-at-top
except ImportError:
pass
class ModelSpec(object):
"""Model specification given adjacency matrix and labeling."""
def __init__(self, matrix, ops, data_format='channels_last'):
"""Initialize the module spec.
Args:
matrix: ndarray or nested list with shape [V, V] for the adjacency matrix.
ops: V-length list of labels for the base ops used. The first and last
elements are ignored because they are the input and output vertices
which have no operations. The elements are retained to keep consistent
indexing.
data_format: channels_last or channels_first.
Raises:
ValueError: invalid matrix or ops
"""
if not isinstance(matrix, np.ndarray):
matrix = np.array(matrix)
shape = np.shape(matrix)
if len(shape) != 2 or shape[0] != shape[1]:
raise ValueError('matrix must be square')
if shape[0] != len(ops):
raise ValueError('length of ops must match matrix dimensions')
if not is_upper_triangular(matrix):
raise ValueError('matrix must be upper triangular')
# Both the original and pruned matrices are deep copies of the matrix and
# ops so any changes to those after initialization are not recognized by the
# spec.
self.original_matrix = copy.deepcopy(matrix)
self.original_ops = copy.deepcopy(ops)
self.matrix = copy.deepcopy(matrix)
self.ops = copy.deepcopy(ops)
self.valid_spec = True
self._prune()
self.data_format = data_format
def _prune(self):
"""Prune the extraneous parts of the graph.
General procedure:
1) Remove parts of graph not connected to input.
2) Remove parts of graph not connected to output.
3) Reorder the vertices so that they are consecutive after steps 1 and 2.
These 3 steps can be combined by deleting the rows and columns of the
vertices that are not reachable from both the input and output (in reverse).
"""
num_vertices = np.shape(self.original_matrix)[0]
# DFS forward from input
visited_from_input = set([0])
frontier = [0]
while frontier:
top = frontier.pop()
for v in range(top + 1, num_vertices):
if self.original_matrix[top, v] and v not in visited_from_input:
visited_from_input.add(v)
frontier.append(v)
# DFS backward from output
visited_from_output = set([num_vertices - 1])
frontier = [num_vertices - 1]
while frontier:
top = frontier.pop()
for v in range(0, top):
if self.original_matrix[v, top] and v not in visited_from_output:
visited_from_output.add(v)
frontier.append(v)
# Any vertex that isn't connected to both input and output is extraneous to
# the computation graph.
extraneous = set(range(num_vertices)).difference(
visited_from_input.intersection(visited_from_output))
# If the non-extraneous graph is less than 2 vertices, the input is not
# connected to the output and the spec is invalid.
if len(extraneous) > num_vertices - 2:
self.matrix = None
self.ops = None
self.valid_spec = False
return
self.matrix = np.delete(self.matrix, list(extraneous), axis=0)
self.matrix = np.delete(self.matrix, list(extraneous), axis=1)
for index in sorted(extraneous, reverse=True):
del self.ops[index]
def hash_spec(self, canonical_ops):
"""Computes the isomorphism-invariant graph hash of this spec.
Args:
canonical_ops: list of operations in the canonical ordering which they
were assigned (i.e. the order provided in the config['available_ops']).
Returns:
MD5 hash of this spec which can be used to query the dataset.
"""
# Invert the operations back to integer label indices used in graph gen.
labeling = [-1] + [canonical_ops.index(op) for op in self.ops[1:-1]] + [-2]
return graph_util.hash_module(self.matrix, labeling)
def visualize(self):
"""Creates a dot graph. Can be visualized in colab directly."""
num_vertices = np.shape(self.matrix)[0]
g = graphviz.Digraph()
g.node(str(0), 'input')
for v in range(1, num_vertices - 1):
g.node(str(v), self.ops[v])
g.node(str(num_vertices - 1), 'output')
for src in range(num_vertices - 1):
for dst in range(src + 1, num_vertices):
if self.matrix[src, dst]:
g.edge(str(src), str(dst))
return g
def is_upper_triangular(matrix):
"""True if matrix is 0 on diagonal and below."""
for src in range(np.shape(matrix)[0]):
for dst in range(0, src + 1):
if matrix[src, dst] != 0:
return False
return True

View File

@@ -0,0 +1,361 @@
from models import get_cell_based_tiny_net, get_search_spaces
from nas_201_api import NASBench201API as API
from nasbench import api as nasbench101api
from nas_101_api.model import Network
from nas_101_api.model_spec import ModelSpec
import itertools
import random
import numpy as np
from models.cell_searchs.genotypes import Structure
from copy import deepcopy
from pycls.models.nas.nas import NetworkImageNet, NetworkCIFAR
from pycls.models.anynet import AnyNet
from pycls.models.nas.genotypes import GENOTYPES, Genotype
import json
import torch
class Nasbench201:
def __init__(self, dataset, apiloc):
self.dataset = dataset
self.api = API(apiloc, verbose=False)
self.epochs = '12'
def get_network(self, uid):
#config = self.api.get_net_config(uid, self.dataset)
config = self.api.get_net_config(uid, 'cifar10-valid')
print(config)
config['num_classes'] = 1
network = get_cell_based_tiny_net(config)
return network
def __iter__(self):
for uid in range(len(self)):
network = self.get_network(uid)
yield uid, network
def __getitem__(self, index):
return index
def __len__(self):
return 15625
def num_activations(self):
network = self.get_network(0)
return network.classifier.in_features
#def get_12epoch_accuracy(self, uid, acc_type, trainval, traincifar10=False):
# archinfo = self.api.query_meta_info_by_index(uid)
# if (self.dataset == 'cifar10' or traincifar10) and trainval:
# #return archinfo.get_metrics('cifar10-valid', acc_type, iepoch=12)['accuracy']
# return archinfo.get_metrics('cifar10-valid', 'x-valid', iepoch=12)['accuracy']
# elif traincifar10:
# return archinfo.get_metrics('cifar10', acc_type, iepoch=12)['accuracy']
# else:
# return archinfo.get_metrics(self.dataset, 'ori-test', iepoch=12)['accuracy']
def get_12epoch_accuracy(self, uid, acc_type, trainval, traincifar10=False):
#archinfo = self.api.query_meta_info_by_index(uid)
#if (self.dataset == 'cifar10' and trainval) or traincifar10:
info = self.api.get_more_info(uid, 'cifar10-valid', iepoch=None, hp=self.epochs, is_random=True)
#else:
# info = self.api.get_more_info(uid, self.dataset, iepoch=None, hp=self.epochs, is_random=True)
return info['valid-accuracy']
def get_final_accuracy(self, uid, acc_type, trainval):
#archinfo = self.api.query_meta_info_by_index(uid)
if self.dataset == 'cifar10' and trainval:
info = self.api.query_meta_info_by_index(uid, hp='200').get_metrics('cifar10-valid', 'x-valid')
#info = self.api.query_by_index(uid, 'cifar10-valid', hp='200')
#info = self.api.get_more_info(uid, 'cifar10-valid', iepoch=None, hp='200', is_random=True)
else:
info = self.api.query_meta_info_by_index(uid, hp='200').get_metrics(self.dataset, acc_type)
#info = self.api.query_by_index(uid, self.dataset, hp='200')
#info = self.api.get_more_info(uid, self.dataset, iepoch=None, hp='200', is_random=True)
return info['accuracy']
#return info['valid-accuracy']
#if self.dataset == 'cifar10' and trainval:
# return archinfo.get_metrics('cifar10-valid', acc_type, iepoch=11)['accuracy']
#else:
# #return archinfo.get_metrics(self.dataset, 'ori-test', iepoch=12)['accuracy']
# return archinfo.get_metrics(self.dataset, 'x-test', iepoch=11)['accuracy']
##dataset = self.dataset
##if self.dataset == 'cifar10' and trainval:
## dataset = 'cifar10-valid'
##archinfo = self.api.get_more_info(uid, dataset, iepoch=None, use_12epochs_result=True, is_random=True)
##return archinfo['valid-accuracy']
def get_accuracy(self, uid, acc_type, trainval=True):
archinfo = self.api.query_meta_info_by_index(uid)
if self.dataset == 'cifar10' and trainval:
return archinfo.get_metrics('cifar10-valid', acc_type)['accuracy']
else:
return archinfo.get_metrics(self.dataset, acc_type)['accuracy']
def get_accuracy_for_all_datasets(self, uid):
archinfo = self.api.query_meta_info_by_index(uid,hp='200')
c10 = archinfo.get_metrics('cifar10', 'ori-test')['accuracy']
c10_val = archinfo.get_metrics('cifar10-valid', 'x-valid')['accuracy']
c100 = archinfo.get_metrics('cifar100', 'x-test')['accuracy']
c100_val = archinfo.get_metrics('cifar100', 'x-valid')['accuracy']
imagenet = archinfo.get_metrics('ImageNet16-120', 'x-test')['accuracy']
imagenet_val = archinfo.get_metrics('ImageNet16-120', 'x-valid')['accuracy']
return c10, c10_val, c100, c100_val, imagenet, imagenet_val
#def train_and_eval(self, arch, dataname, acc_type, trainval=True):
# unique_hash = self.__getitem__(arch)
# time = self.get_training_time(unique_hash)
# acc12 = self.get_12epoch_accuracy(unique_hash, acc_type, trainval)
# acc = self.get_final_accuracy(unique_hash, acc_type, trainval)
# return acc12, acc, time
def train_and_eval(self, arch, dataname, acc_type, trainval=True, traincifar10=False):
unique_hash = self.__getitem__(arch)
time = self.get_training_time(unique_hash)
acc12 = self.get_12epoch_accuracy(unique_hash, acc_type, trainval, traincifar10)
acc = self.get_final_accuracy(unique_hash, acc_type, trainval)
return acc12, acc, time
def random_arch(self):
return random.randint(0, len(self)-1)
def get_training_time(self, unique_hash):
#info = self.api.get_more_info(unique_hash, 'cifar10-valid' if self.dataset == 'cifar10' else self.dataset, iepoch=None, use_12epochs_result=True, is_random=True)
#info = self.api.get_more_info(unique_hash, 'cifar10-valid', iepoch=None, use_12epochs_result=True, is_random=True)
info = self.api.get_more_info(unique_hash, 'cifar10-valid', iepoch=None, hp='12', is_random=True)
return info['train-all-time'] + info['valid-per-time']
#if self.dataset == 'cifar10' and trainval:
# info = self.api.get_more_info(unique_hash, 'cifar10-valid', iepoch=None, hp=self.epochs, is_random=True)
#else:
# info = self.api.get_more_info(unique_hash, self.dataset, iepoch=None, hp=self.epochs, is_random=True)
##info = self.api.get_more_info(unique_hash, 'cifar10-valid', iepoch=None, use_12epochs_result=True, is_random=True)
#return info['train-all-time'] + info['valid-per-time']
def mutate_arch(self, arch):
op_names = get_search_spaces('cell', 'nas-bench-201')
#config = self.api.get_net_config(arch, self.dataset)
config = self.api.get_net_config(arch, 'cifar10-valid')
parent_arch = Structure(self.api.str2lists(config['arch_str']))
child_arch = deepcopy( parent_arch )
node_id = random.randint(0, len(child_arch.nodes)-1)
node_info = list( child_arch.nodes[node_id] )
snode_id = random.randint(0, len(node_info)-1)
xop = random.choice( op_names )
while xop == node_info[snode_id][0]:
xop = random.choice( op_names )
node_info[snode_id] = (xop, node_info[snode_id][1])
child_arch.nodes[node_id] = tuple( node_info )
arch_index = self.api.query_index_by_arch( child_arch )
return arch_index
class Nasbench101:
def __init__(self, dataset, apiloc, args):
self.dataset = dataset
self.api = nasbench101api.NASBench(apiloc)
self.args = args
def get_accuracy(self, unique_hash, acc_type, trainval=True):
spec = self.get_spec(unique_hash)
_, stats = self.api.get_metrics_from_spec(spec)
maxacc = 0.
for ep in stats:
for statmap in stats[ep]:
newacc = statmap['final_test_accuracy']
if newacc > maxacc:
maxacc = newacc
return maxacc
def get_final_accuracy(self, uid, acc_type, trainval):
return self.get_accuracy(uid, acc_type, trainval)
def get_training_time(self, unique_hash):
spec = self.get_spec(unique_hash)
_, stats = self.api.get_metrics_from_spec(spec)
maxacc = -1.
maxtime = 0.
for ep in stats:
for statmap in stats[ep]:
newacc = statmap['final_test_accuracy']
if newacc > maxacc:
maxacc = newacc
maxtime = statmap['final_training_time']
return maxtime
def get_network(self, unique_hash):
spec = self.get_spec(unique_hash)
network = Network(spec, self.args)
return network
def get_spec(self, unique_hash):
matrix = self.api.fixed_statistics[unique_hash]['module_adjacency']
operations = self.api.fixed_statistics[unique_hash]['module_operations']
spec = ModelSpec(matrix, operations)
return spec
def __iter__(self):
for unique_hash in self.api.hash_iterator():
network = self.get_network(unique_hash)
yield unique_hash, network
def __getitem__(self, index):
return next(itertools.islice(self.api.hash_iterator(), index, None))
def __len__(self):
return len(self.api.hash_iterator())
def num_activations(self):
for unique_hash in self.api.hash_iterator():
network = self.get_network(unique_hash)
return network.classifier.in_features
def train_and_eval(self, arch, dataname, acc_type, trainval=True, traincifar10=False):
unique_hash = self.__getitem__(arch)
time =12.* self.get_training_time(unique_hash)/108.
acc = self.get_accuracy(unique_hash, acc_type, trainval)
return acc, acc, time
def random_arch(self):
return random.randint(0, len(self)-1)
def mutate_arch(self, arch):
unique_hash = self.__getitem__(arch)
matrix = self.api.fixed_statistics[unique_hash]['module_adjacency']
operations = self.api.fixed_statistics[unique_hash]['module_operations']
coords = [ (i, j) for i in range(matrix.shape[0]) for j in range(i+1, matrix.shape[1])]
random.shuffle(coords)
# loop through changes until we find change thats allowed
for i, j in coords:
# try the ops in a particular order
for k in [m for m in np.unique(matrix) if m != matrix[i, j]]:
newmatrix = matrix.copy()
newmatrix[i, j] = k
spec = ModelSpec(newmatrix, operations)
try:
newhash = self.api._hash_spec(spec)
if newhash in self.api.fixed_statistics:
return [n for n, m in enumerate(self.api.fixed_statistics.keys()) if m == newhash][0]
except:
pass
class ReturnFeatureLayer(torch.nn.Module):
def __init__(self, mod):
super(ReturnFeatureLayer, self).__init__()
self.mod = mod
def forward(self, x):
return self.mod(x), x
def return_feature_layer(network, prefix=''):
#for attr_str in dir(network):
# target_attr = getattr(network, attr_str)
# if isinstance(target_attr, torch.nn.Linear):
# setattr(network, attr_str, ReturnFeatureLayer(target_attr))
for n, ch in list(network.named_children()):
if isinstance(ch, torch.nn.Linear):
setattr(network, n, ReturnFeatureLayer(ch))
else:
return_feature_layer(ch, prefix + '\t')
class NDS:
def __init__(self, searchspace):
self.searchspace = searchspace
data = json.load(open(f'nds_data/{searchspace}.json', 'r'))
try:
data = data['top'] + data['mid']
except Exception as e:
pass
self.data = data
def __iter__(self):
for unique_hash in range(len(self)):
network = self.get_network(unique_hash)
yield unique_hash, network
def get_network_config(self, uid):
return self.data[uid]['net']
def get_network_optim_config(self, uid):
return self.data[uid]['optim']
def get_network(self, uid):
netinfo = self.data[uid]
config = netinfo['net']
#print(config)
if 'genotype' in config:
#print('geno')
gen = config['genotype']
genotype = Genotype(normal=gen['normal'], normal_concat=gen['normal_concat'], reduce=gen['reduce'], reduce_concat=gen['reduce_concat'])
if '_in' in self.searchspace:
network = NetworkImageNet(config['width'], 1, config['depth'], config['aux'], genotype)
else:
network = NetworkCIFAR(config['width'], 1, config['depth'], config['aux'], genotype)
network.drop_path_prob = 0.
#print(config)
#print('genotype')
L = config['depth']
else:
if 'bot_muls' in config and 'bms' not in config:
config['bms'] = config['bot_muls']
del config['bot_muls']
if 'num_gs' in config and 'gws' not in config:
config['gws'] = config['num_gs']
del config['num_gs']
config['nc'] = 1
config['se_r'] = None
config['stem_w'] = 12
L = sum(config['ds'])
if 'ResN' in self.searchspace:
config['stem_type'] = 'res_stem_in'
else:
config['stem_type'] = 'simple_stem_in'
#"res_stem_cifar": ResStemCifar,
#"res_stem_in": ResStemIN,
#"simple_stem_in": SimpleStemIN,
if config['block_type'] == 'double_plain_block':
config['block_type'] = 'vanilla_block'
network = AnyNet(**config)
return_feature_layer(network)
return network
def __getitem__(self, index):
return index
def __len__(self):
return len(self.data)
def random_arch(self):
return random.randint(0, len(self.data)-1)
def get_final_accuracy(self, uid, acc_type, trainval):
return 100.-self.data[uid]['test_ep_top1'][-1]
def get_search_space(args):
if args.nasspace == 'nasbench201':
return Nasbench201(args.dataset, args.api_loc)
elif args.nasspace == 'nasbench101':
return Nasbench101(args.dataset, args.api_loc, args)
elif args.nasspace == 'nds_resnet':
return NDS('ResNet')
elif args.nasspace == 'nds_amoeba':
return NDS('Amoeba')
elif args.nasspace == 'nds_amoeba_in':
return NDS('Amoeba_in')
elif args.nasspace == 'nds_darts_in':
return NDS('DARTS_in')
elif args.nasspace == 'nds_darts':
return NDS('DARTS')
elif args.nasspace == 'nds_darts_fix-w-d':
return NDS('DARTS_fix-w-d')
elif args.nasspace == 'nds_darts_lr-wd':
return NDS('DARTS_lr-wd')
elif args.nasspace == 'nds_enas':
return NDS('ENAS')
elif args.nasspace == 'nds_enas_in':
return NDS('ENAS_in')
elif args.nasspace == 'nds_enas_fix-w-d':
return NDS('ENAS_fix-w-d')
elif args.nasspace == 'nds_pnas':
return NDS('PNAS')
elif args.nasspace == 'nds_pnas_fix-w-d':
return NDS('PNAS_fix-w-d')
elif args.nasspace == 'nds_pnas_in':
return NDS('PNAS_in')
elif args.nasspace == 'nds_nasnet':
return NDS('NASNet')
elif args.nasspace == 'nds_nasnet_in':
return NDS('NASNet_in')
elif args.nasspace == 'nds_resnext-a':
return NDS('ResNeXt-A')
elif args.nasspace == 'nds_resnext-a_in':
return NDS('ResNeXt-A_in')
elif args.nasspace == 'nds_resnext-b':
return NDS('ResNeXt-B')
elif args.nasspace == 'nds_resnext-b_in':
return NDS('ResNeXt-B_in')
elif args.nasspace == 'nds_vanilla':
return NDS('Vanilla')
elif args.nasspace == 'nds_vanilla_lr-wd':
return NDS('Vanilla_lr-wd')
elif args.nasspace == 'nds_vanilla_lr-wd_in':
return NDS('Vanilla_lr-wd_in')

View File

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Benchmarking functions."""
import pycls.core.logging as logging
import pycls.datasets.loader as loader
import torch
from pycls.core.config import cfg
from pycls.core.timer import Timer
logger = logging.get_logger(__name__)
@torch.no_grad()
def compute_time_eval(model):
"""Computes precise model forward test time using dummy data."""
# Use eval mode
model.eval()
# Generate a dummy mini-batch and copy data to GPU
im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS)
if cfg.TASK == "jig":
inputs = torch.rand(batch_size, cfg.JIGSAW_GRID ** 2, cfg.MODEL.INPUT_CHANNELS, im_size, im_size).cuda(non_blocking=False)
else:
inputs = torch.zeros(batch_size, cfg.MODEL.INPUT_CHANNELS, im_size, im_size).cuda(non_blocking=False)
# Compute precise forward pass time
timer = Timer()
total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER
for cur_iter in range(total_iter):
# Reset the timers after the warmup phase
if cur_iter == cfg.PREC_TIME.WARMUP_ITER:
timer.reset()
# Forward
timer.tic()
model(inputs)
torch.cuda.synchronize()
timer.toc()
return timer.average_time
def compute_time_train(model, loss_fun):
"""Computes precise model forward + backward time using dummy data."""
# Use train mode
model.train()
# Generate a dummy mini-batch and copy data to GPU
im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS)
if cfg.TASK == "jig":
inputs = torch.rand(batch_size, cfg.JIGSAW_GRID ** 2, cfg.MODEL.INPUT_CHANNELS, im_size, im_size).cuda(non_blocking=False)
else:
inputs = torch.rand(batch_size, cfg.MODEL.INPUT_CHANNELS, im_size, im_size).cuda(non_blocking=False)
if cfg.TASK in ['col', 'seg']:
labels = torch.zeros(batch_size, im_size, im_size, dtype=torch.int64).cuda(non_blocking=False)
else:
labels = torch.zeros(batch_size, dtype=torch.int64).cuda(non_blocking=False)
# Cache BatchNorm2D running stats
bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)]
bn_stats = [[bn.running_mean.clone(), bn.running_var.clone()] for bn in bns]
# Compute precise forward backward pass time
fw_timer, bw_timer = Timer(), Timer()
total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER
for cur_iter in range(total_iter):
# Reset the timers after the warmup phase
if cur_iter == cfg.PREC_TIME.WARMUP_ITER:
fw_timer.reset()
bw_timer.reset()
# Forward
fw_timer.tic()
preds = model(inputs)
if isinstance(preds, tuple):
loss = loss_fun(preds[0], labels) + cfg.NAS.AUX_WEIGHT * loss_fun(preds[1], labels)
preds = preds[0]
else:
loss = loss_fun(preds, labels)
torch.cuda.synchronize()
fw_timer.toc()
# Backward
bw_timer.tic()
loss.backward()
torch.cuda.synchronize()
bw_timer.toc()
# Restore BatchNorm2D running stats
for bn, (mean, var) in zip(bns, bn_stats):
bn.running_mean, bn.running_var = mean, var
return fw_timer.average_time, bw_timer.average_time
def compute_time_loader(data_loader):
"""Computes loader time."""
timer = Timer()
loader.shuffle(data_loader, 0)
data_loader_iterator = iter(data_loader)
total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER
total_iter = min(total_iter, len(data_loader))
for cur_iter in range(total_iter):
if cur_iter == cfg.PREC_TIME.WARMUP_ITER:
timer.reset()
timer.tic()
next(data_loader_iterator)
timer.toc()
return timer.average_time
def compute_time_full(model, loss_fun, train_loader, test_loader):
"""Times model and data loader."""
logger.info("Computing model and loader timings...")
# Compute timings
test_fw_time = compute_time_eval(model)
train_fw_time, train_bw_time = compute_time_train(model, loss_fun)
train_fw_bw_time = train_fw_time + train_bw_time
train_loader_time = compute_time_loader(train_loader)
# Output iter timing
iter_times = {
"test_fw_time": test_fw_time,
"train_fw_time": train_fw_time,
"train_bw_time": train_bw_time,
"train_fw_bw_time": train_fw_bw_time,
"train_loader_time": train_loader_time,
}
logger.info(logging.dump_log_data(iter_times, "iter_times"))
# Output epoch timing
epoch_times = {
"test_fw_time": test_fw_time * len(test_loader),
"train_fw_time": train_fw_time * len(train_loader),
"train_bw_time": train_bw_time * len(train_loader),
"train_fw_bw_time": train_fw_bw_time * len(train_loader),
"train_loader_time": train_loader_time * len(train_loader),
}
logger.info(logging.dump_log_data(epoch_times, "epoch_times"))
# Compute data loader overhead (assuming DATA_LOADER.NUM_WORKERS>1)
overhead = max(0, train_loader_time - train_fw_bw_time) / train_fw_bw_time
logger.info("Overhead of data loader is {:.2f}%".format(overhead * 100))

View File

@@ -0,0 +1,88 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Model and loss construction functions."""
import torch
from pycls.core.config import cfg
from pycls.models.anynet import AnyNet
from pycls.models.effnet import EffNet
from pycls.models.regnet import RegNet
from pycls.models.resnet import ResNet
from pycls.models.nas.nas import NAS
from pycls.models.nas.nas_search import NAS_Search
from pycls.models.nas_bench.model_builder import NAS_Bench
class LabelSmoothedCrossEntropyLoss(torch.nn.Module):
"""CrossEntropyLoss with label smoothing."""
def __init__(self):
super(LabelSmoothedCrossEntropyLoss, self).__init__()
self.eps = cfg.MODEL.LABEL_SMOOTHING_EPS
self.num_classes = cfg.MODEL.NUM_CLASSES
def forward(self, logits, target):
pred = logits.log_softmax(dim=-1)
with torch.no_grad():
target_dist = torch.ones_like(pred) * self.eps / (self.num_classes - 1)
target_dist.scatter_(-1, target.unsqueeze(-1), 1 - self.eps)
return (-target_dist * pred).sum(dim=-1).mean()
# Supported models
_models = {
"anynet": AnyNet,
"effnet": EffNet,
"resnet": ResNet,
"regnet": RegNet,
"nas": NAS,
"nas_search": NAS_Search,
"nas_bench": NAS_Bench,
}
# Supported loss functions
_loss_funs = {
"cross_entropy": torch.nn.CrossEntropyLoss,
"label_smoothed_cross_entropy": LabelSmoothedCrossEntropyLoss,
}
def get_model():
"""Gets the model class specified in the config."""
err_str = "Model type '{}' not supported"
assert cfg.MODEL.TYPE in _models.keys(), err_str.format(cfg.MODEL.TYPE)
return _models[cfg.MODEL.TYPE]
def get_loss_fun():
"""Gets the loss function class specified in the config."""
err_str = "Loss function type '{}' not supported"
assert cfg.MODEL.LOSS_FUN in _loss_funs.keys(), err_str.format(cfg.TRAIN.LOSS)
return _loss_funs[cfg.MODEL.LOSS_FUN]
def build_model():
"""Builds the model."""
return get_model()()
def build_loss_fun():
"""Build the loss function."""
if cfg.TASK == "seg":
return get_loss_fun()(ignore_index=255)
else:
return get_loss_fun()()
def register_model(name, ctor):
"""Registers a model dynamically."""
_models[name] = ctor
def register_loss_fun(name, ctor):
"""Registers a loss function dynamically."""
_loss_funs[name] = ctor

View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Functions that handle saving and loading of checkpoints."""
import os
import pycls.core.distributed as dist
import torch
from pycls.core.config import cfg
# Common prefix for checkpoint file names
_NAME_PREFIX = "model_epoch_"
# Checkpoints directory name
_DIR_NAME = "checkpoints"
def get_checkpoint_dir():
"""Retrieves the location for storing checkpoints."""
return os.path.join(cfg.OUT_DIR, _DIR_NAME)
def get_checkpoint(epoch):
"""Retrieves the path to a checkpoint file."""
name = "{}{:04d}.pyth".format(_NAME_PREFIX, epoch)
return os.path.join(get_checkpoint_dir(), name)
def get_last_checkpoint():
"""Retrieves the most recent checkpoint (highest epoch number)."""
checkpoint_dir = get_checkpoint_dir()
# Checkpoint file names are in lexicographic order
checkpoints = [f for f in os.listdir(checkpoint_dir) if _NAME_PREFIX in f]
last_checkpoint_name = sorted(checkpoints)[-1]
return os.path.join(checkpoint_dir, last_checkpoint_name)
def has_checkpoint():
"""Determines if there are checkpoints available."""
checkpoint_dir = get_checkpoint_dir()
if not os.path.exists(checkpoint_dir):
return False
return any(_NAME_PREFIX in f for f in os.listdir(checkpoint_dir))
def save_checkpoint(model, optimizer, epoch):
"""Saves a checkpoint."""
# Save checkpoints only from the master process
if not dist.is_master_proc():
return
# Ensure that the checkpoint dir exists
os.makedirs(get_checkpoint_dir(), exist_ok=True)
# Omit the DDP wrapper in the multi-gpu setting
sd = model.module.state_dict() if cfg.NUM_GPUS > 1 else model.state_dict()
# Record the state
if isinstance(optimizer, list):
checkpoint = {
"epoch": epoch,
"model_state": sd,
"optimizer_w_state": optimizer[0].state_dict(),
"optimizer_a_state": optimizer[1].state_dict(),
"cfg": cfg.dump(),
}
else:
checkpoint = {
"epoch": epoch,
"model_state": sd,
"optimizer_state": optimizer.state_dict(),
"cfg": cfg.dump(),
}
# Write the checkpoint
checkpoint_file = get_checkpoint(epoch + 1)
torch.save(checkpoint, checkpoint_file)
return checkpoint_file
def load_checkpoint(checkpoint_file, model, optimizer=None):
"""Loads the checkpoint from the given file."""
err_str = "Checkpoint '{}' not found"
assert os.path.exists(checkpoint_file), err_str.format(checkpoint_file)
# Load the checkpoint on CPU to avoid GPU mem spike
checkpoint = torch.load(checkpoint_file, map_location="cpu")
# Account for the DDP wrapper in the multi-gpu setting
ms = model.module if cfg.NUM_GPUS > 1 else model
ms.load_state_dict(checkpoint["model_state"])
# Load the optimizer state (commonly not done when fine-tuning)
if optimizer:
if isinstance(optimizer, list):
optimizer[0].load_state_dict(checkpoint["optimizer_w_state"])
optimizer[1].load_state_dict(checkpoint["optimizer_a_state"])
else:
optimizer.load_state_dict(checkpoint["optimizer_state"])
return checkpoint["epoch"]

View File

@@ -0,0 +1,500 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Configuration file (powered by YACS)."""
import argparse
import os
import sys
from pycls.core.io import cache_url
from yacs.config import CfgNode as CfgNode
# Global config object
_C = CfgNode()
# Example usage:
# from core.config import cfg
cfg = _C
# ------------------------------------------------------------------------------------ #
# Model options
# ------------------------------------------------------------------------------------ #
_C.MODEL = CfgNode()
# Model type
_C.MODEL.TYPE = ""
# Number of weight layers
_C.MODEL.DEPTH = 0
# Number of input channels
_C.MODEL.INPUT_CHANNELS = 3
# Number of classes
_C.MODEL.NUM_CLASSES = 10
# Loss function (see pycls/core/builders.py for options)
_C.MODEL.LOSS_FUN = "cross_entropy"
# Label smoothing eps
_C.MODEL.LABEL_SMOOTHING_EPS = 0.0
# ASPP channels
_C.MODEL.ASPP_CHANNELS = 256
# ASPP dilation rates
_C.MODEL.ASPP_RATES = [6, 12, 18]
# ------------------------------------------------------------------------------------ #
# ResNet options
# ------------------------------------------------------------------------------------ #
_C.RESNET = CfgNode()
# Transformation function (see pycls/models/resnet.py for options)
_C.RESNET.TRANS_FUN = "basic_transform"
# Number of groups to use (1 -> ResNet; > 1 -> ResNeXt)
_C.RESNET.NUM_GROUPS = 1
# Width of each group (64 -> ResNet; 4 -> ResNeXt)
_C.RESNET.WIDTH_PER_GROUP = 64
# Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch)
_C.RESNET.STRIDE_1X1 = True
# ------------------------------------------------------------------------------------ #
# AnyNet options
# ------------------------------------------------------------------------------------ #
_C.ANYNET = CfgNode()
# Stem type
_C.ANYNET.STEM_TYPE = "simple_stem_in"
# Stem width
_C.ANYNET.STEM_W = 32
# Block type
_C.ANYNET.BLOCK_TYPE = "res_bottleneck_block"
# Depth for each stage (number of blocks in the stage)
_C.ANYNET.DEPTHS = []
# Width for each stage (width of each block in the stage)
_C.ANYNET.WIDTHS = []
# Strides for each stage (applies to the first block of each stage)
_C.ANYNET.STRIDES = []
# Bottleneck multipliers for each stage (applies to bottleneck block)
_C.ANYNET.BOT_MULS = []
# Group widths for each stage (applies to bottleneck block)
_C.ANYNET.GROUP_WS = []
# Whether SE is enabled for res_bottleneck_block
_C.ANYNET.SE_ON = False
# SE ratio
_C.ANYNET.SE_R = 0.25
# ------------------------------------------------------------------------------------ #
# RegNet options
# ------------------------------------------------------------------------------------ #
_C.REGNET = CfgNode()
# Stem type
_C.REGNET.STEM_TYPE = "simple_stem_in"
# Stem width
_C.REGNET.STEM_W = 32
# Block type
_C.REGNET.BLOCK_TYPE = "res_bottleneck_block"
# Stride of each stage
_C.REGNET.STRIDE = 2
# Squeeze-and-Excitation (RegNetY)
_C.REGNET.SE_ON = False
_C.REGNET.SE_R = 0.25
# Depth
_C.REGNET.DEPTH = 10
# Initial width
_C.REGNET.W0 = 32
# Slope
_C.REGNET.WA = 5.0
# Quantization
_C.REGNET.WM = 2.5
# Group width
_C.REGNET.GROUP_W = 16
# Bottleneck multiplier (bm = 1 / b from the paper)
_C.REGNET.BOT_MUL = 1.0
# ------------------------------------------------------------------------------------ #
# EfficientNet options
# ------------------------------------------------------------------------------------ #
_C.EN = CfgNode()
# Stem width
_C.EN.STEM_W = 32
# Depth for each stage (number of blocks in the stage)
_C.EN.DEPTHS = []
# Width for each stage (width of each block in the stage)
_C.EN.WIDTHS = []
# Expansion ratios for MBConv blocks in each stage
_C.EN.EXP_RATIOS = []
# Squeeze-and-Excitation (SE) ratio
_C.EN.SE_R = 0.25
# Strides for each stage (applies to the first block of each stage)
_C.EN.STRIDES = []
# Kernel sizes for each stage
_C.EN.KERNELS = []
# Head width
_C.EN.HEAD_W = 1280
# Drop connect ratio
_C.EN.DC_RATIO = 0.0
# Dropout ratio
_C.EN.DROPOUT_RATIO = 0.0
# ---------------------------------------------------------------------------- #
# NAS options
# ---------------------------------------------------------------------------- #
_C.NAS = CfgNode()
# Cell genotype
_C.NAS.GENOTYPE = 'nas'
# Custom genotype
_C.NAS.CUSTOM_GENOTYPE = []
# Base NAS width
_C.NAS.WIDTH = 16
# Total number of cells
_C.NAS.DEPTH = 20
# Auxiliary heads
_C.NAS.AUX = False
# Weight for auxiliary heads
_C.NAS.AUX_WEIGHT = 0.4
# Drop path probability
_C.NAS.DROP_PROB = 0.0
# Matrix in NAS Bench
_C.NAS.MATRIX = []
# Operations in NAS Bench
_C.NAS.OPS = []
# Number of stacks in NAS Bench
_C.NAS.NUM_STACKS = 3
# Number of modules per stack in NAS Bench
_C.NAS.NUM_MODULES_PER_STACK = 3
# ------------------------------------------------------------------------------------ #
# Batch norm options
# ------------------------------------------------------------------------------------ #
_C.BN = CfgNode()
# BN epsilon
_C.BN.EPS = 1e-5
# BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2)
_C.BN.MOM = 0.1
# Precise BN stats
_C.BN.USE_PRECISE_STATS = False
_C.BN.NUM_SAMPLES_PRECISE = 1024
# Initialize the gamma of the final BN of each block to zero
_C.BN.ZERO_INIT_FINAL_GAMMA = False
# Use a different weight decay for BN layers
_C.BN.USE_CUSTOM_WEIGHT_DECAY = False
_C.BN.CUSTOM_WEIGHT_DECAY = 0.0
# ------------------------------------------------------------------------------------ #
# Optimizer options
# ------------------------------------------------------------------------------------ #
_C.OPTIM = CfgNode()
# Base learning rate
_C.OPTIM.BASE_LR = 0.1
# Learning rate policy select from {'cos', 'exp', 'steps'}
_C.OPTIM.LR_POLICY = "cos"
# Exponential decay factor
_C.OPTIM.GAMMA = 0.1
# Steps for 'steps' policy (in epochs)
_C.OPTIM.STEPS = []
# Learning rate multiplier for 'steps' policy
_C.OPTIM.LR_MULT = 0.1
# Maximal number of epochs
_C.OPTIM.MAX_EPOCH = 200
# Momentum
_C.OPTIM.MOMENTUM = 0.9
# Momentum dampening
_C.OPTIM.DAMPENING = 0.0
# Nesterov momentum
_C.OPTIM.NESTEROV = True
# L2 regularization
_C.OPTIM.WEIGHT_DECAY = 5e-4
# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR
_C.OPTIM.WARMUP_FACTOR = 0.1
# Gradually warm up the OPTIM.BASE_LR over this number of epochs
_C.OPTIM.WARMUP_EPOCHS = 0
# Update the learning rate per iter
_C.OPTIM.ITER_LR = False
# Base learning rate for arch
_C.OPTIM.ARCH_BASE_LR = 0.0003
# L2 regularization for arch
_C.OPTIM.ARCH_WEIGHT_DECAY = 0.001
# Optimizer for arch
_C.OPTIM.ARCH_OPTIM = 'adam'
# Epoch to start optimizing arch
_C.OPTIM.ARCH_EPOCH = 0.0
# ------------------------------------------------------------------------------------ #
# Training options
# ------------------------------------------------------------------------------------ #
_C.TRAIN = CfgNode()
# Dataset and split
_C.TRAIN.DATASET = ""
_C.TRAIN.SPLIT = "train"
# Total mini-batch size
_C.TRAIN.BATCH_SIZE = 128
# Image size
_C.TRAIN.IM_SIZE = 224
# Evaluate model on test data every eval period epochs
_C.TRAIN.EVAL_PERIOD = 1
# Save model checkpoint every checkpoint period epochs
_C.TRAIN.CHECKPOINT_PERIOD = 1
# Resume training from the latest checkpoint in the output directory
_C.TRAIN.AUTO_RESUME = True
# Weights to start training from
_C.TRAIN.WEIGHTS = ""
# Percentage of gray images in jig
_C.TRAIN.GRAY_PERCENTAGE = 0.0
# Portion to create trainA/trainB split
_C.TRAIN.PORTION = 1.0
# ------------------------------------------------------------------------------------ #
# Testing options
# ------------------------------------------------------------------------------------ #
_C.TEST = CfgNode()
# Dataset and split
_C.TEST.DATASET = ""
_C.TEST.SPLIT = "val"
# Total mini-batch size
_C.TEST.BATCH_SIZE = 200
# Image size
_C.TEST.IM_SIZE = 256
# Weights to use for testing
_C.TEST.WEIGHTS = ""
# ------------------------------------------------------------------------------------ #
# Common train/test data loader options
# ------------------------------------------------------------------------------------ #
_C.DATA_LOADER = CfgNode()
# Number of data loader workers per process
_C.DATA_LOADER.NUM_WORKERS = 8
# Load data to pinned host memory
_C.DATA_LOADER.PIN_MEMORY = True
# ------------------------------------------------------------------------------------ #
# Memory options
# ------------------------------------------------------------------------------------ #
_C.MEM = CfgNode()
# Perform ReLU inplace
_C.MEM.RELU_INPLACE = True
# ------------------------------------------------------------------------------------ #
# CUDNN options
# ------------------------------------------------------------------------------------ #
_C.CUDNN = CfgNode()
# Perform benchmarking to select the fastest CUDNN algorithms to use
# Note that this may increase the memory usage and will likely not result
# in overall speedups when variable size inputs are used (e.g. COCO training)
_C.CUDNN.BENCHMARK = True
# ------------------------------------------------------------------------------------ #
# Precise timing options
# ------------------------------------------------------------------------------------ #
_C.PREC_TIME = CfgNode()
# Number of iterations to warm up the caches
_C.PREC_TIME.WARMUP_ITER = 3
# Number of iterations to compute avg time
_C.PREC_TIME.NUM_ITER = 30
# ------------------------------------------------------------------------------------ #
# Misc options
# ------------------------------------------------------------------------------------ #
# Number of GPUs to use (applies to both training and testing)
_C.NUM_GPUS = 1
# Task (cls, seg, rot, col, jig)
_C.TASK = "cls"
# Grid in Jigsaw (2, 3); no effect if TASK is not jig
_C.JIGSAW_GRID = 3
# Output directory
_C.OUT_DIR = "/tmp"
# Config destination (in OUT_DIR)
_C.CFG_DEST = "config.yaml"
# Note that non-determinism may still be present due to non-deterministic
# operator implementations in GPU operator libraries
_C.RNG_SEED = 1
# Log destination ('stdout' or 'file')
_C.LOG_DEST = "stdout"
# Log period in iters
_C.LOG_PERIOD = 10
# Distributed backend
_C.DIST_BACKEND = "nccl"
# Hostname and port for initializing multi-process groups
_C.HOST = "localhost"
_C.PORT = 10001
# Models weights referred to by URL are downloaded to this local cache
_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache"
# ------------------------------------------------------------------------------------ #
# Deprecated keys
# ------------------------------------------------------------------------------------ #
_C.register_deprecated_key("PREC_TIME.BATCH_SIZE")
_C.register_deprecated_key("PREC_TIME.ENABLED")
def assert_and_infer_cfg(cache_urls=True):
"""Checks config values invariants."""
err_str = "The first lr step must start at 0"
assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str
data_splits = ["train", "val", "test"]
err_str = "Data split '{}' not supported"
assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT)
assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT)
err_str = "Mini-batch size should be a multiple of NUM_GPUS."
assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
err_str = "Precise BN stats computation not verified for > 1 GPU"
assert not _C.BN.USE_PRECISE_STATS or _C.NUM_GPUS == 1, err_str
err_str = "Log destination '{}' not supported"
assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST)
if cache_urls:
cache_cfg_urls()
def cache_cfg_urls():
"""Download URLs in config, cache them, and rewrite cfg to use cached file."""
_C.TRAIN.WEIGHTS = cache_url(_C.TRAIN.WEIGHTS, _C.DOWNLOAD_CACHE)
_C.TEST.WEIGHTS = cache_url(_C.TEST.WEIGHTS, _C.DOWNLOAD_CACHE)
def dump_cfg():
"""Dumps the config to the output directory."""
cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST)
with open(cfg_file, "w") as f:
_C.dump(stream=f)
def load_cfg(out_dir, cfg_dest="config.yaml"):
"""Loads config from specified output directory."""
cfg_file = os.path.join(out_dir, cfg_dest)
_C.merge_from_file(cfg_file)
def load_cfg_fom_args(description="Config file options."):
"""Load config from command line arguments and set any specified options."""
parser = argparse.ArgumentParser(description=description)
help_s = "Config file location"
parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str)
help_s = "See pycls/core/config.py for all options"
parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
_C.merge_from_file(args.cfg_file)
_C.merge_from_list(args.opts)

View File

@@ -0,0 +1,157 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Distributed helpers."""
import multiprocessing
import os
import signal
import threading
import traceback
import torch
from pycls.core.config import cfg
def is_master_proc():
"""Determines if the current process is the master process.
Master process is responsible for logging, writing and loading checkpoints. In
the multi GPU setting, we assign the master role to the rank 0 process. When
training using a single GPU, there is a single process which is considered master.
"""
return cfg.NUM_GPUS == 1 or torch.distributed.get_rank() == 0
def init_process_group(proc_rank, world_size):
"""Initializes the default process group."""
# Set the GPU to use
torch.cuda.set_device(proc_rank)
# Initialize the process group
torch.distributed.init_process_group(
backend=cfg.DIST_BACKEND,
init_method="tcp://{}:{}".format(cfg.HOST, cfg.PORT),
world_size=world_size,
rank=proc_rank,
)
def destroy_process_group():
"""Destroys the default process group."""
torch.distributed.destroy_process_group()
def scaled_all_reduce(tensors):
"""Performs the scaled all_reduce operation on the provided tensors.
The input tensors are modified in-place. Currently supports only the sum
reduction operator. The reduced values are scaled by the inverse size of the
process group (equivalent to cfg.NUM_GPUS).
"""
# There is no need for reduction in the single-proc case
if cfg.NUM_GPUS == 1:
return tensors
# Queue the reductions
reductions = []
for tensor in tensors:
reduction = torch.distributed.all_reduce(tensor, async_op=True)
reductions.append(reduction)
# Wait for reductions to finish
for reduction in reductions:
reduction.wait()
# Scale the results
for tensor in tensors:
tensor.mul_(1.0 / cfg.NUM_GPUS)
return tensors
class ChildException(Exception):
"""Wraps an exception from a child process."""
def __init__(self, child_trace):
super(ChildException, self).__init__(child_trace)
class ErrorHandler(object):
"""Multiprocessing error handler (based on fairseq's).
Listens for errors in child processes and propagates the tracebacks to the parent.
"""
def __init__(self, error_queue):
# Shared error queue
self.error_queue = error_queue
# Children processes sharing the error queue
self.children_pids = []
# Start a thread listening to errors
self.error_listener = threading.Thread(target=self.listen, daemon=True)
self.error_listener.start()
# Register the signal handler
signal.signal(signal.SIGUSR1, self.signal_handler)
def add_child(self, pid):
"""Registers a child process."""
self.children_pids.append(pid)
def listen(self):
"""Listens for errors in the error queue."""
# Wait until there is an error in the queue
child_trace = self.error_queue.get()
# Put the error back for the signal handler
self.error_queue.put(child_trace)
# Invoke the signal handler
os.kill(os.getpid(), signal.SIGUSR1)
def signal_handler(self, _sig_num, _stack_frame):
"""Signal handler."""
# Kill children processes
for pid in self.children_pids:
os.kill(pid, signal.SIGINT)
# Propagate the error from the child process
raise ChildException(self.error_queue.get())
def run(proc_rank, world_size, error_queue, fun, fun_args, fun_kwargs):
"""Runs a function from a child process."""
try:
# Initialize the process group
init_process_group(proc_rank, world_size)
# Run the function
fun(*fun_args, **fun_kwargs)
except KeyboardInterrupt:
# Killed by the parent process
pass
except Exception:
# Propagate exception to the parent process
error_queue.put(traceback.format_exc())
finally:
# Destroy the process group
destroy_process_group()
def multi_proc_run(num_proc, fun, fun_args=(), fun_kwargs=None):
"""Runs a function in a multi-proc setting (unless num_proc == 1)."""
# There is no need for multi-proc in the single-proc case
fun_kwargs = fun_kwargs if fun_kwargs else {}
if num_proc == 1:
fun(*fun_args, **fun_kwargs)
return
# Handle errors from training subprocesses
error_queue = multiprocessing.SimpleQueue()
error_handler = ErrorHandler(error_queue)
# Run each training subprocess
ps = []
for i in range(num_proc):
p_i = multiprocessing.Process(
target=run, args=(i, num_proc, error_queue, fun, fun_args, fun_kwargs)
)
ps.append(p_i)
p_i.start()
error_handler.add_child(p_i.pid)
# Wait for each subprocess to finish
for p in ps:
p.join()

View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""IO utilities (adapted from Detectron)"""
import logging
import os
import re
import sys
from urllib import request as urlrequest
logger = logging.getLogger(__name__)
_PYCLS_BASE_URL = "https://dl.fbaipublicfiles.com/pycls"
def cache_url(url_or_file, cache_dir):
"""Download the file specified by the URL to the cache_dir and return the path to
the cached file. If the argument is not a URL, simply return it as is.
"""
is_url = re.match(r"^(?:http)s?://", url_or_file, re.IGNORECASE) is not None
if not is_url:
return url_or_file
url = url_or_file
err_str = "pycls only automatically caches URLs in the pycls S3 bucket: {}"
assert url.startswith(_PYCLS_BASE_URL), err_str.format(_PYCLS_BASE_URL)
cache_file_path = url.replace(_PYCLS_BASE_URL, cache_dir)
if os.path.exists(cache_file_path):
return cache_file_path
cache_file_dir = os.path.dirname(cache_file_path)
if not os.path.exists(cache_file_dir):
os.makedirs(cache_file_dir)
logger.info("Downloading remote file {} to {}".format(url, cache_file_path))
download_url(url, cache_file_path)
return cache_file_path
def _progress_bar(count, total):
"""Report download progress. Credit:
https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
"""
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = "=" * filled_len + "-" * (bar_len - filled_len)
sys.stdout.write(
" [{}] {}% of {:.1f}MB file \r".format(bar, percents, total / 1024 / 1024)
)
sys.stdout.flush()
if count >= total:
sys.stdout.write("\n")
def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar):
"""Download url and write it to dst_file_path. Credit:
https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
"""
req = urlrequest.Request(url)
response = urlrequest.urlopen(req)
total_size = response.info().get("Content-Length").strip()
total_size = int(total_size)
bytes_so_far = 0
with open(dst_file_path, "wb") as f:
while 1:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
if not chunk:
break
if progress_hook:
progress_hook(bytes_so_far, total_size)
f.write(chunk)
return bytes_so_far

View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Logging."""
import builtins
import decimal
import logging
import os
import sys
import pycls.core.distributed as dist
import simplejson
from pycls.core.config import cfg
# Show filename and line number in logs
_FORMAT = "[%(filename)s: %(lineno)3d]: %(message)s"
# Log file name (for cfg.LOG_DEST = 'file')
_LOG_FILE = "stdout.log"
# Data output with dump_log_data(data, data_type) will be tagged w/ this
_TAG = "json_stats: "
# Data output with dump_log_data(data, data_type) will have data[_TYPE]=data_type
_TYPE = "_type"
def _suppress_print():
"""Suppresses printing from the current process."""
def ignore(*_objects, _sep=" ", _end="\n", _file=sys.stdout, _flush=False):
pass
builtins.print = ignore
def setup_logging():
"""Sets up the logging."""
# Enable logging only for the master process
if dist.is_master_proc():
# Clear the root logger to prevent any existing logging config
# (e.g. set by another module) from messing with our setup
logging.root.handlers = []
# Construct logging configuration
logging_config = {"level": logging.INFO, "format": _FORMAT}
# Log either to stdout or to a file
if cfg.LOG_DEST == "stdout":
logging_config["stream"] = sys.stdout
else:
logging_config["filename"] = os.path.join(cfg.OUT_DIR, _LOG_FILE)
# Configure logging
logging.basicConfig(**logging_config)
else:
_suppress_print()
def get_logger(name):
"""Retrieves the logger."""
return logging.getLogger(name)
def dump_log_data(data, data_type, prec=4):
"""Covert data (a dictionary) into tagged json string for logging."""
data[_TYPE] = data_type
data = float_to_decimal(data, prec)
data_json = simplejson.dumps(data, sort_keys=True, use_decimal=True)
return "{:s}{:s}".format(_TAG, data_json)
def float_to_decimal(data, prec=4):
"""Convert floats to decimals which allows for fixed width json."""
if isinstance(data, dict):
return {k: float_to_decimal(v, prec) for k, v in data.items()}
if isinstance(data, float):
return decimal.Decimal(("{:." + str(prec) + "f}").format(data))
else:
return data
def get_log_files(log_dir, name_filter="", log_file=_LOG_FILE):
"""Get all log files in directory containing subdirs of trained models."""
names = [n for n in sorted(os.listdir(log_dir)) if name_filter in n]
files = [os.path.join(log_dir, n, log_file) for n in names]
f_n_ps = [(f, n) for (f, n) in zip(files, names) if os.path.exists(f)]
files, names = zip(*f_n_ps) if f_n_ps else ([], [])
return files, names
def load_log_data(log_file, data_types_to_skip=()):
"""Loads log data into a dictionary of the form data[data_type][metric][index]."""
# Load log_file
assert os.path.exists(log_file), "Log file not found: {}".format(log_file)
with open(log_file, "r") as f:
lines = f.readlines()
# Extract and parse lines that start with _TAG and have a type specified
lines = [l[l.find(_TAG) + len(_TAG) :] for l in lines if _TAG in l]
lines = [simplejson.loads(l) for l in lines]
lines = [l for l in lines if _TYPE in l and not l[_TYPE] in data_types_to_skip]
# Generate data structure accessed by data[data_type][index][metric]
data_types = [l[_TYPE] for l in lines]
data = {t: [] for t in data_types}
for t, line in zip(data_types, lines):
del line[_TYPE]
data[t].append(line)
# Generate data structure accessed by data[data_type][metric][index]
for t in data:
metrics = sorted(data[t][0].keys())
err_str = "Inconsistent metrics in log for _type={}: {}".format(t, metrics)
assert all(sorted(d.keys()) == metrics for d in data[t]), err_str
data[t] = {m: [d[m] for d in data[t]] for m in metrics}
return data
def sort_log_data(data):
"""Sort each data[data_type][metric] by epoch or keep only first instance."""
for t in data:
if "epoch" in data[t]:
assert "epoch_ind" not in data[t] and "epoch_max" not in data[t]
data[t]["epoch_ind"] = [int(e.split("/")[0]) for e in data[t]["epoch"]]
data[t]["epoch_max"] = [int(e.split("/")[1]) for e in data[t]["epoch"]]
epoch = data[t]["epoch_ind"]
if "iter" in data[t]:
assert "iter_ind" not in data[t] and "iter_max" not in data[t]
data[t]["iter_ind"] = [int(i.split("/")[0]) for i in data[t]["iter"]]
data[t]["iter_max"] = [int(i.split("/")[1]) for i in data[t]["iter"]]
itr = zip(epoch, data[t]["iter_ind"], data[t]["iter_max"])
epoch = [e + (i_ind - 1) / i_max for e, i_ind, i_max in itr]
for m in data[t]:
data[t][m] = [v for _, v in sorted(zip(epoch, data[t][m]))]
else:
data[t] = {m: d[0] for m, d in data[t].items()}
return data

View File

@@ -0,0 +1,435 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Meters."""
from collections import deque
import numpy as np
import pycls.core.logging as logging
import torch
from pycls.core.config import cfg
from pycls.core.timer import Timer
logger = logging.get_logger(__name__)
def time_string(seconds):
"""Converts time in seconds to a fixed-width string format."""
days, rem = divmod(int(seconds), 24 * 3600)
hrs, rem = divmod(rem, 3600)
mins, secs = divmod(rem, 60)
return "{0:02},{1:02}:{2:02}:{3:02}".format(days, hrs, mins, secs)
def inter_union(preds, labels, num_classes):
_, preds = torch.max(preds, 1)
preds = preds.type(torch.uint8) + 1
labels = labels.type(torch.uint8) + 1
preds = preds * (labels > 0).type(torch.uint8)
inter = preds * (preds == labels).type(torch.uint8)
area_inter = torch.histc(inter.type(torch.int64), bins=num_classes, min=1, max=num_classes)
area_preds = torch.histc(preds.type(torch.int64), bins=num_classes, min=1, max=num_classes)
area_labels = torch.histc(labels.type(torch.int64), bins=num_classes, min=1, max=num_classes)
area_union = area_preds + area_labels - area_inter
return [area_inter.type(torch.float64) / labels.size(0), area_union.type(torch.float64) / labels.size(0)]
def topk_errors(preds, labels, ks):
"""Computes the top-k error for each k."""
err_str = "Batch dim of predictions and labels must match"
assert preds.size(0) == labels.size(0), err_str
# Find the top max_k predictions for each sample
_top_max_k_vals, top_max_k_inds = torch.topk(
preds, max(ks), dim=1, largest=True, sorted=True
)
# (batch_size, max_k) -> (max_k, batch_size)
top_max_k_inds = top_max_k_inds.t()
# (batch_size, ) -> (max_k, batch_size)
rep_max_k_labels = labels.view(1, -1).expand_as(top_max_k_inds)
# (i, j) = 1 if top i-th prediction for the j-th sample is correct
top_max_k_correct = top_max_k_inds.eq(rep_max_k_labels)
# Compute the number of topk correct predictions for each k
topks_correct = [top_max_k_correct[:k, :].view(-1).float().sum() for k in ks]
return [(1.0 - x / preds.size(0)) * 100.0 for x in topks_correct]
def gpu_mem_usage():
"""Computes the GPU memory usage for the current device (MB)."""
mem_usage_bytes = torch.cuda.max_memory_allocated()
return mem_usage_bytes / 1024 / 1024
class ScalarMeter(object):
"""Measures a scalar value (adapted from Detectron)."""
def __init__(self, window_size):
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
def reset(self):
self.deque.clear()
self.total = 0.0
self.count = 0
def add_value(self, value):
self.deque.append(value)
self.count += 1
self.total += value
def get_win_median(self):
return np.median(self.deque)
def get_win_avg(self):
return np.mean(self.deque)
def get_global_avg(self):
return self.total / self.count
class TrainMeter(object):
"""Measures training stats."""
def __init__(self, epoch_iters):
self.epoch_iters = epoch_iters
self.max_iter = cfg.OPTIM.MAX_EPOCH * epoch_iters
self.iter_timer = Timer()
self.loss = ScalarMeter(cfg.LOG_PERIOD)
self.loss_total = 0.0
self.lr = None
# Current minibatch errors (smoothed over a window)
self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
# Number of misclassified examples
self.num_top1_mis = 0
self.num_top5_mis = 0
self.num_samples = 0
def reset(self, timer=False):
if timer:
self.iter_timer.reset()
self.loss.reset()
self.loss_total = 0.0
self.lr = None
self.mb_top1_err.reset()
self.mb_top5_err.reset()
self.num_top1_mis = 0
self.num_top5_mis = 0
self.num_samples = 0
def iter_tic(self):
self.iter_timer.tic()
def iter_toc(self):
self.iter_timer.toc()
def update_stats(self, top1_err, top5_err, loss, lr, mb_size):
# Current minibatch stats
self.mb_top1_err.add_value(top1_err)
self.mb_top5_err.add_value(top5_err)
self.loss.add_value(loss)
self.lr = lr
# Aggregate stats
self.num_top1_mis += top1_err * mb_size
self.num_top5_mis += top5_err * mb_size
self.loss_total += loss * mb_size
self.num_samples += mb_size
def get_iter_stats(self, cur_epoch, cur_iter):
cur_iter_total = cur_epoch * self.epoch_iters + cur_iter + 1
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
mem_usage = gpu_mem_usage()
stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"iter": "{}/{}".format(cur_iter + 1, self.epoch_iters),
"time_avg": self.iter_timer.average_time,
"time_diff": self.iter_timer.diff,
"eta": time_string(eta_sec),
"top1_err": self.mb_top1_err.get_win_median(),
"top5_err": self.mb_top5_err.get_win_median(),
"loss": self.loss.get_win_median(),
"lr": self.lr,
"mem": int(np.ceil(mem_usage)),
}
return stats
def log_iter_stats(self, cur_epoch, cur_iter):
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
return
stats = self.get_iter_stats(cur_epoch, cur_iter)
logger.info(logging.dump_log_data(stats, "train_iter"))
def get_epoch_stats(self, cur_epoch):
cur_iter_total = (cur_epoch + 1) * self.epoch_iters
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
mem_usage = gpu_mem_usage()
top1_err = self.num_top1_mis / self.num_samples
top5_err = self.num_top5_mis / self.num_samples
avg_loss = self.loss_total / self.num_samples
stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"time_avg": self.iter_timer.average_time,
"eta": time_string(eta_sec),
"top1_err": top1_err,
"top5_err": top5_err,
"loss": avg_loss,
"lr": self.lr,
"mem": int(np.ceil(mem_usage)),
}
return stats
def log_epoch_stats(self, cur_epoch):
stats = self.get_epoch_stats(cur_epoch)
logger.info(logging.dump_log_data(stats, "train_epoch"))
class TestMeter(object):
"""Measures testing stats."""
def __init__(self, max_iter):
self.max_iter = max_iter
self.iter_timer = Timer()
# Current minibatch errors (smoothed over a window)
self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD)
self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD)
# Min errors (over the full test set)
self.min_top1_err = 100.0
self.min_top5_err = 100.0
# Number of misclassified examples
self.num_top1_mis = 0
self.num_top5_mis = 0
self.num_samples = 0
def reset(self, min_errs=False):
if min_errs:
self.min_top1_err = 100.0
self.min_top5_err = 100.0
self.iter_timer.reset()
self.mb_top1_err.reset()
self.mb_top5_err.reset()
self.num_top1_mis = 0
self.num_top5_mis = 0
self.num_samples = 0
def iter_tic(self):
self.iter_timer.tic()
def iter_toc(self):
self.iter_timer.toc()
def update_stats(self, top1_err, top5_err, mb_size):
self.mb_top1_err.add_value(top1_err)
self.mb_top5_err.add_value(top5_err)
self.num_top1_mis += top1_err * mb_size
self.num_top5_mis += top5_err * mb_size
self.num_samples += mb_size
def get_iter_stats(self, cur_epoch, cur_iter):
mem_usage = gpu_mem_usage()
iter_stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"iter": "{}/{}".format(cur_iter + 1, self.max_iter),
"time_avg": self.iter_timer.average_time,
"time_diff": self.iter_timer.diff,
"top1_err": self.mb_top1_err.get_win_median(),
"top5_err": self.mb_top5_err.get_win_median(),
"mem": int(np.ceil(mem_usage)),
}
return iter_stats
def log_iter_stats(self, cur_epoch, cur_iter):
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
return
stats = self.get_iter_stats(cur_epoch, cur_iter)
logger.info(logging.dump_log_data(stats, "test_iter"))
def get_epoch_stats(self, cur_epoch):
top1_err = self.num_top1_mis / self.num_samples
top5_err = self.num_top5_mis / self.num_samples
self.min_top1_err = min(self.min_top1_err, top1_err)
self.min_top5_err = min(self.min_top5_err, top5_err)
mem_usage = gpu_mem_usage()
stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"time_avg": self.iter_timer.average_time,
"top1_err": top1_err,
"top5_err": top5_err,
"min_top1_err": self.min_top1_err,
"min_top5_err": self.min_top5_err,
"mem": int(np.ceil(mem_usage)),
}
return stats
def log_epoch_stats(self, cur_epoch):
stats = self.get_epoch_stats(cur_epoch)
logger.info(logging.dump_log_data(stats, "test_epoch"))
class TrainMeterIoU(object):
"""Measures training stats."""
def __init__(self, epoch_iters):
self.epoch_iters = epoch_iters
self.max_iter = cfg.OPTIM.MAX_EPOCH * epoch_iters
self.iter_timer = Timer()
self.loss = ScalarMeter(cfg.LOG_PERIOD)
self.loss_total = 0.0
self.lr = None
self.mb_miou = ScalarMeter(cfg.LOG_PERIOD)
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_samples = 0
def reset(self, timer=False):
if timer:
self.iter_timer.reset()
self.loss.reset()
self.loss_total = 0.0
self.lr = None
self.mb_miou.reset()
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_samples = 0
def iter_tic(self):
self.iter_timer.tic()
def iter_toc(self):
self.iter_timer.toc()
def update_stats(self, inter, union, loss, lr, mb_size):
# Current minibatch stats
self.mb_miou.add_value((inter / (union + 1e-10)).mean())
self.loss.add_value(loss)
self.lr = lr
# Aggregate stats
self.num_inter += inter * mb_size
self.num_union += union * mb_size
self.loss_total += loss * mb_size
self.num_samples += mb_size
def get_iter_stats(self, cur_epoch, cur_iter):
cur_iter_total = cur_epoch * self.epoch_iters + cur_iter + 1
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
mem_usage = gpu_mem_usage()
stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"iter": "{}/{}".format(cur_iter + 1, self.epoch_iters),
"time_avg": self.iter_timer.average_time,
"time_diff": self.iter_timer.diff,
"eta": time_string(eta_sec),
"miou": self.mb_miou.get_win_median(),
"loss": self.loss.get_win_median(),
"lr": self.lr,
"mem": int(np.ceil(mem_usage)),
}
return stats
def log_iter_stats(self, cur_epoch, cur_iter):
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
return
stats = self.get_iter_stats(cur_epoch, cur_iter)
logger.info(logging.dump_log_data(stats, "train_iter"))
def get_epoch_stats(self, cur_epoch):
cur_iter_total = (cur_epoch + 1) * self.epoch_iters
eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total)
mem_usage = gpu_mem_usage()
miou = (self.num_inter / (self.num_union + 1e-10)).mean()
avg_loss = self.loss_total / self.num_samples
stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"time_avg": self.iter_timer.average_time,
"eta": time_string(eta_sec),
"miou": miou,
"loss": avg_loss,
"lr": self.lr,
"mem": int(np.ceil(mem_usage)),
}
return stats
def log_epoch_stats(self, cur_epoch):
stats = self.get_epoch_stats(cur_epoch)
logger.info(logging.dump_log_data(stats, "train_epoch"))
class TestMeterIoU(object):
"""Measures testing stats."""
def __init__(self, max_iter):
self.max_iter = max_iter
self.iter_timer = Timer()
self.mb_miou = ScalarMeter(cfg.LOG_PERIOD)
self.max_miou = 0.0
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_samples = 0
def reset(self, min_errs=False):
if min_errs:
self.max_miou = 0.0
self.iter_timer.reset()
self.mb_miou.reset()
self.num_inter = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_union = np.zeros(cfg.MODEL.NUM_CLASSES)
self.num_samples = 0
def iter_tic(self):
self.iter_timer.tic()
def iter_toc(self):
self.iter_timer.toc()
def update_stats(self, inter, union, mb_size):
self.mb_miou.add_value((inter / (union + 1e-10)).mean())
self.num_inter += inter * mb_size
self.num_union += union * mb_size
self.num_samples += mb_size
def get_iter_stats(self, cur_epoch, cur_iter):
mem_usage = gpu_mem_usage()
iter_stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"iter": "{}/{}".format(cur_iter + 1, self.max_iter),
"time_avg": self.iter_timer.average_time,
"time_diff": self.iter_timer.diff,
"miou": self.mb_miou.get_win_median(),
"mem": int(np.ceil(mem_usage)),
}
return iter_stats
def log_iter_stats(self, cur_epoch, cur_iter):
if (cur_iter + 1) % cfg.LOG_PERIOD != 0:
return
stats = self.get_iter_stats(cur_epoch, cur_iter)
logger.info(logging.dump_log_data(stats, "test_iter"))
def get_epoch_stats(self, cur_epoch):
miou = (self.num_inter / (self.num_union + 1e-10)).mean()
self.max_miou = max(self.max_miou, miou)
mem_usage = gpu_mem_usage()
stats = {
"epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH),
"time_avg": self.iter_timer.average_time,
"miou": miou,
"max_miou": self.max_miou,
"mem": int(np.ceil(mem_usage)),
}
return stats
def log_epoch_stats(self, cur_epoch):
stats = self.get_epoch_stats(cur_epoch)
logger.info(logging.dump_log_data(stats, "test_epoch"))

View File

@@ -0,0 +1,129 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Functions for manipulating networks."""
import itertools
import math
import torch
import torch.nn as nn
from pycls.core.config import cfg
def init_weights(m):
"""Performs ResNet-style weight initialization."""
if isinstance(m, nn.Conv2d):
# Note that there is no bias due to BN
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out))
elif isinstance(m, nn.BatchNorm2d):
zero_init_gamma = cfg.BN.ZERO_INIT_FINAL_GAMMA
zero_init_gamma = hasattr(m, "final_bn") and m.final_bn and zero_init_gamma
m.weight.data.fill_(0.0 if zero_init_gamma else 1.0)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(mean=0.0, std=0.01)
m.bias.data.zero_()
@torch.no_grad()
def compute_precise_bn_stats(model, loader):
"""Computes precise BN stats on training data."""
# Compute the number of minibatches to use
num_iter = min(cfg.BN.NUM_SAMPLES_PRECISE // loader.batch_size, len(loader))
# Retrieve the BN layers
bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)]
# Initialize stats storage
mus = [torch.zeros_like(bn.running_mean) for bn in bns]
sqs = [torch.zeros_like(bn.running_var) for bn in bns]
# Remember momentum values
moms = [bn.momentum for bn in bns]
# Disable momentum
for bn in bns:
bn.momentum = 1.0
# Accumulate the stats across the data samples
for inputs, _labels in itertools.islice(loader, num_iter):
model(inputs.cuda())
# Accumulate the stats for each BN layer
for i, bn in enumerate(bns):
m, v = bn.running_mean, bn.running_var
sqs[i] += (v + m * m) / num_iter
mus[i] += m / num_iter
# Set the stats and restore momentum values
for i, bn in enumerate(bns):
bn.running_var = sqs[i] - mus[i] * mus[i]
bn.running_mean = mus[i]
bn.momentum = moms[i]
def reset_bn_stats(model):
"""Resets running BN stats."""
for m in model.modules():
if isinstance(m, torch.nn.BatchNorm2d):
m.reset_running_stats()
def complexity_conv2d(cx, w_in, w_out, k, stride, padding, groups=1, bias=False):
"""Accumulates complexity of Conv2D into cx = (h, w, flops, params, acts)."""
h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"]
h = (h + 2 * padding - k) // stride + 1
w = (w + 2 * padding - k) // stride + 1
flops += k * k * w_in * w_out * h * w // groups
params += k * k * w_in * w_out // groups
flops += w_out if bias else 0
params += w_out if bias else 0
acts += w_out * h * w
return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts}
def complexity_batchnorm2d(cx, w_in):
"""Accumulates complexity of BatchNorm2D into cx = (h, w, flops, params, acts)."""
h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"]
params += 2 * w_in
return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts}
def complexity_maxpool2d(cx, k, stride, padding):
"""Accumulates complexity of MaxPool2d into cx = (h, w, flops, params, acts)."""
h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"]
h = (h + 2 * padding - k) // stride + 1
w = (w + 2 * padding - k) // stride + 1
return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts}
def complexity(model):
"""Compute model complexity (model can be model instance or model class)."""
size = cfg.TRAIN.IM_SIZE
cx = {"h": size, "w": size, "flops": 0, "params": 0, "acts": 0}
cx = model.complexity(cx)
return {"flops": cx["flops"], "params": cx["params"], "acts": cx["acts"]}
def drop_connect(x, drop_ratio):
"""Drop connect (adapted from DARTS)."""
keep_ratio = 1.0 - drop_ratio
mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
mask.bernoulli_(keep_ratio)
x.div_(keep_ratio)
x.mul_(mask)
return x
def get_flat_weights(model):
"""Gets all model weights as a single flat vector."""
return torch.cat([p.data.view(-1, 1) for p in model.parameters()], 0)
def set_flat_weights(model, flat_weights):
"""Sets all model weights from a single flat vector."""
k = 0
for p in model.parameters():
n = p.data.numel()
p.data.copy_(flat_weights[k : (k + n)].view_as(p.data))
k += n
assert k == flat_weights.numel()

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Optimizer."""
import numpy as np
import torch
from pycls.core.config import cfg
def construct_optimizer(model):
"""Constructs the optimizer.
Note that the momentum update in PyTorch differs from the one in Caffe2.
In particular,
Caffe2:
V := mu * V + lr * g
p := p - V
PyTorch:
V := mu * V + g
p := p - lr * V
where V is the velocity, mu is the momentum factor, lr is the learning rate,
g is the gradient and p are the parameters.
Since V is defined independently of the learning rate in PyTorch,
when the learning rate is changed there is no need to perform the
momentum correction by scaling V (unlike in the Caffe2 case).
"""
if cfg.BN.USE_CUSTOM_WEIGHT_DECAY:
# Apply different weight decay to Batchnorm and non-batchnorm parameters.
p_bn = [p for n, p in model.named_parameters() if "bn" in n]
p_non_bn = [p for n, p in model.named_parameters() if "bn" not in n]
optim_params = [
{"params": p_bn, "weight_decay": cfg.BN.CUSTOM_WEIGHT_DECAY},
{"params": p_non_bn, "weight_decay": cfg.OPTIM.WEIGHT_DECAY},
]
else:
optim_params = model.parameters()
return torch.optim.SGD(
optim_params,
lr=cfg.OPTIM.BASE_LR,
momentum=cfg.OPTIM.MOMENTUM,
weight_decay=cfg.OPTIM.WEIGHT_DECAY,
dampening=cfg.OPTIM.DAMPENING,
nesterov=cfg.OPTIM.NESTEROV,
)
def lr_fun_steps(cur_epoch):
"""Steps schedule (cfg.OPTIM.LR_POLICY = 'steps')."""
ind = [i for i, s in enumerate(cfg.OPTIM.STEPS) if cur_epoch >= s][-1]
return cfg.OPTIM.BASE_LR * (cfg.OPTIM.LR_MULT ** ind)
def lr_fun_exp(cur_epoch):
"""Exponential schedule (cfg.OPTIM.LR_POLICY = 'exp')."""
return cfg.OPTIM.BASE_LR * (cfg.OPTIM.GAMMA ** cur_epoch)
def lr_fun_cos(cur_epoch):
"""Cosine schedule (cfg.OPTIM.LR_POLICY = 'cos')."""
base_lr, max_epoch = cfg.OPTIM.BASE_LR, cfg.OPTIM.MAX_EPOCH
return 0.5 * base_lr * (1.0 + np.cos(np.pi * cur_epoch / max_epoch))
def get_lr_fun():
"""Retrieves the specified lr policy function"""
lr_fun = "lr_fun_" + cfg.OPTIM.LR_POLICY
if lr_fun not in globals():
raise NotImplementedError("Unknown LR policy:" + cfg.OPTIM.LR_POLICY)
return globals()[lr_fun]
def get_epoch_lr(cur_epoch):
"""Retrieves the lr for the given epoch according to the policy."""
lr = get_lr_fun()(cur_epoch)
# Linear warmup
if cur_epoch < cfg.OPTIM.WARMUP_EPOCHS:
alpha = cur_epoch / cfg.OPTIM.WARMUP_EPOCHS
warmup_factor = cfg.OPTIM.WARMUP_FACTOR * (1.0 - alpha) + alpha
lr *= warmup_factor
return lr
def set_lr(optimizer, new_lr):
"""Sets the optimizer lr to the specified value."""
for param_group in optimizer.param_groups:
param_group["lr"] = new_lr

View File

@@ -0,0 +1,132 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Plotting functions."""
import colorlover as cl
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.offline as offline
import pycls.core.logging as logging
def get_plot_colors(max_colors, color_format="pyplot"):
"""Generate colors for plotting."""
colors = cl.scales["11"]["qual"]["Paired"]
if max_colors > len(colors):
colors = cl.to_rgb(cl.interp(colors, max_colors))
if color_format == "pyplot":
return [[j / 255.0 for j in c] for c in cl.to_numeric(colors)]
return colors
def prepare_plot_data(log_files, names, metric="top1_err"):
"""Load logs and extract data for plotting error curves."""
plot_data = []
for file, name in zip(log_files, names):
d, data = {}, logging.sort_log_data(logging.load_log_data(file))
for phase in ["train", "test"]:
x = data[phase + "_epoch"]["epoch_ind"]
y = data[phase + "_epoch"][metric]
d["x_" + phase], d["y_" + phase] = x, y
d[phase + "_label"] = "[{:5.2f}] ".format(min(y) if y else 0) + name
plot_data.append(d)
assert len(plot_data) > 0, "No data to plot"
return plot_data
def plot_error_curves_plotly(log_files, names, filename, metric="top1_err"):
"""Plot error curves using plotly and save to file."""
plot_data = prepare_plot_data(log_files, names, metric)
colors = get_plot_colors(len(plot_data), "plotly")
# Prepare data for plots (3 sets, train duplicated w and w/o legend)
data = []
for i, d in enumerate(plot_data):
s = str(i)
line_train = {"color": colors[i], "dash": "dashdot", "width": 1.5}
line_test = {"color": colors[i], "dash": "solid", "width": 1.5}
data.append(
go.Scatter(
x=d["x_train"],
y=d["y_train"],
mode="lines",
name=d["train_label"],
line=line_train,
legendgroup=s,
visible=True,
showlegend=False,
)
)
data.append(
go.Scatter(
x=d["x_test"],
y=d["y_test"],
mode="lines",
name=d["test_label"],
line=line_test,
legendgroup=s,
visible=True,
showlegend=True,
)
)
data.append(
go.Scatter(
x=d["x_train"],
y=d["y_train"],
mode="lines",
name=d["train_label"],
line=line_train,
legendgroup=s,
visible=False,
showlegend=True,
)
)
# Prepare layout w ability to toggle 'all', 'train', 'test'
titlefont = {"size": 18, "color": "#7f7f7f"}
vis = [[True, True, False], [False, False, True], [False, True, False]]
buttons = zip(["all", "train", "test"], [[{"visible": v}] for v in vis])
buttons = [{"label": b, "args": v, "method": "update"} for b, v in buttons]
layout = go.Layout(
title=metric + " vs. epoch<br>[dash=train, solid=test]",
xaxis={"title": "epoch", "titlefont": titlefont},
yaxis={"title": metric, "titlefont": titlefont},
showlegend=True,
hoverlabel={"namelength": -1},
updatemenus=[
{
"buttons": buttons,
"direction": "down",
"showactive": True,
"x": 1.02,
"xanchor": "left",
"y": 1.08,
"yanchor": "top",
}
],
)
# Create plotly plot
offline.plot({"data": data, "layout": layout}, filename=filename)
def plot_error_curves_pyplot(log_files, names, filename=None, metric="top1_err"):
"""Plot error curves using matplotlib.pyplot and save to file."""
plot_data = prepare_plot_data(log_files, names, metric)
colors = get_plot_colors(len(names))
for ind, d in enumerate(plot_data):
c, lbl = colors[ind], d["test_label"]
plt.plot(d["x_train"], d["y_train"], "--", c=c, alpha=0.8)
plt.plot(d["x_test"], d["y_test"], "-", c=c, alpha=0.8, label=lbl)
plt.title(metric + " vs. epoch\n[dash=train, solid=test]", fontsize=14)
plt.xlabel("epoch", fontsize=14)
plt.ylabel(metric, fontsize=14)
plt.grid(alpha=0.4)
plt.legend()
if filename:
plt.savefig(filename)
plt.clf()
else:
plt.show()

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Timer."""
import time
class Timer(object):
"""A simple timer (adapted from Detectron)."""
def __init__(self):
self.total_time = None
self.calls = None
self.start_time = None
self.diff = None
self.average_time = None
self.reset()
def tic(self):
# using time.time as time.clock does not normalize for multithreading
self.start_time = time.time()
def toc(self):
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
def reset(self):
self.total_time = 0.0
self.calls = 0
self.start_time = 0.0
self.diff = 0.0
self.average_time = 0.0

View File

@@ -0,0 +1,419 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Tools for training and testing a model."""
import os
from thop import profile
import numpy as np
import pycls.core.benchmark as benchmark
import pycls.core.builders as builders
import pycls.core.checkpoint as checkpoint
import pycls.core.config as config
import pycls.core.distributed as dist
import pycls.core.logging as logging
import pycls.core.meters as meters
import pycls.core.net as net
import pycls.core.optimizer as optim
import pycls.datasets.loader as loader
import torch
import torch.nn.functional as F
from pycls.core.config import cfg
logger = logging.get_logger(__name__)
def setup_env():
"""Sets up environment for training or testing."""
if dist.is_master_proc():
# Ensure that the output dir exists
os.makedirs(cfg.OUT_DIR, exist_ok=True)
# Save the config
config.dump_cfg()
# Setup logging
logging.setup_logging()
# Log the config as both human readable and as a json
logger.info("Config:\n{}".format(cfg))
logger.info(logging.dump_log_data(cfg, "cfg"))
# Fix the RNG seeds (see RNG comment in core/config.py for discussion)
np.random.seed(cfg.RNG_SEED)
torch.manual_seed(cfg.RNG_SEED)
# Configure the CUDNN backend
torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK
def setup_model():
"""Sets up a model for training or testing and log the results."""
# Build the model
model = builders.build_model()
logger.info("Model:\n{}".format(model))
# Log model complexity
# logger.info(logging.dump_log_data(net.complexity(model), "complexity"))
if cfg.TASK == "seg" and cfg.TRAIN.DATASET == "cityscapes":
h, w = 1025, 2049
else:
h, w = cfg.TRAIN.IM_SIZE, cfg.TRAIN.IM_SIZE
if cfg.TASK == "jig":
x = torch.randn(1, cfg.JIGSAW_GRID ** 2, cfg.MODEL.INPUT_CHANNELS, h, w)
else:
x = torch.randn(1, cfg.MODEL.INPUT_CHANNELS, h, w)
macs, params = profile(model, inputs=(x, ), verbose=False)
logger.info("Params: {:,}".format(params))
logger.info("Flops: {:,}".format(macs))
# Transfer the model to the current GPU device
err_str = "Cannot use more GPU devices than available"
assert cfg.NUM_GPUS <= torch.cuda.device_count(), err_str
cur_device = torch.cuda.current_device()
model = model.cuda(device=cur_device)
# Use multi-process data parallel model in the multi-gpu setting
if cfg.NUM_GPUS > 1:
# Make model replica operate on the current device
model = torch.nn.parallel.DistributedDataParallel(
module=model, device_ids=[cur_device], output_device=cur_device
)
# Set complexity function to be module's complexity function
# model.complexity = model.module.complexity
return model
def train_epoch(train_loader, model, loss_fun, optimizer, train_meter, cur_epoch):
"""Performs one epoch of training."""
# Update drop path prob for NAS
if cfg.MODEL.TYPE == "nas":
m = model.module if cfg.NUM_GPUS > 1 else model
m.set_drop_path_prob(cfg.NAS.DROP_PROB * cur_epoch / cfg.OPTIM.MAX_EPOCH)
# Shuffle the data
loader.shuffle(train_loader, cur_epoch)
# Update the learning rate per epoch
if not cfg.OPTIM.ITER_LR:
lr = optim.get_epoch_lr(cur_epoch)
optim.set_lr(optimizer, lr)
# Enable training mode
model.train()
train_meter.iter_tic()
for cur_iter, (inputs, labels) in enumerate(train_loader):
# Update the learning rate per iter
if cfg.OPTIM.ITER_LR:
lr = optim.get_epoch_lr(cur_epoch + cur_iter / len(train_loader))
optim.set_lr(optimizer, lr)
# Transfer the data to the current GPU device
inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True)
# Perform the forward pass
preds = model(inputs)
# Compute the loss
if isinstance(preds, tuple):
loss = loss_fun(preds[0], labels) + cfg.NAS.AUX_WEIGHT * loss_fun(preds[1], labels)
preds = preds[0]
else:
loss = loss_fun(preds, labels)
# Perform the backward pass
optimizer.zero_grad()
loss.backward()
# Update the parameters
optimizer.step()
# Compute the errors
if cfg.TASK == "col":
preds = preds.permute(0, 2, 3, 1)
preds = preds.reshape(-1, preds.size(3))
labels = labels.reshape(-1)
mb_size = inputs.size(0) * inputs.size(2) * inputs.size(3) * cfg.NUM_GPUS
else:
mb_size = inputs.size(0) * cfg.NUM_GPUS
if cfg.TASK == "seg":
# top1_err is in fact inter; top5_err is in fact union
top1_err, top5_err = meters.inter_union(preds, labels, cfg.MODEL.NUM_CLASSES)
else:
ks = [1, min(5, cfg.MODEL.NUM_CLASSES)] # rot only has 4 classes
top1_err, top5_err = meters.topk_errors(preds, labels, ks)
# Combine the stats across the GPUs (no reduction if 1 GPU used)
loss, top1_err, top5_err = dist.scaled_all_reduce([loss, top1_err, top5_err])
# Copy the stats from GPU to CPU (sync point)
loss = loss.item()
if cfg.TASK == "seg":
top1_err, top5_err = top1_err.cpu().numpy(), top5_err.cpu().numpy()
else:
top1_err, top5_err = top1_err.item(), top5_err.item()
train_meter.iter_toc()
# Update and log stats
train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size)
train_meter.log_iter_stats(cur_epoch, cur_iter)
train_meter.iter_tic()
# Log epoch stats
train_meter.log_epoch_stats(cur_epoch)
train_meter.reset()
def search_epoch(train_loader, model, loss_fun, optimizer, train_meter, cur_epoch):
"""Performs one epoch of differentiable architecture search."""
m = model.module if cfg.NUM_GPUS > 1 else model
# Shuffle the data
loader.shuffle(train_loader[0], cur_epoch)
loader.shuffle(train_loader[1], cur_epoch)
# Update the learning rate per epoch
if not cfg.OPTIM.ITER_LR:
lr = optim.get_epoch_lr(cur_epoch)
optim.set_lr(optimizer[0], lr)
# Enable training mode
model.train()
train_meter.iter_tic()
trainB_iter = iter(train_loader[1])
for cur_iter, (inputs, labels) in enumerate(train_loader[0]):
# Update the learning rate per iter
if cfg.OPTIM.ITER_LR:
lr = optim.get_epoch_lr(cur_epoch + cur_iter / len(train_loader[0]))
optim.set_lr(optimizer[0], lr)
# Transfer the data to the current GPU device
inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True)
# Update architecture
if cur_epoch + cur_iter / len(train_loader[0]) >= cfg.OPTIM.ARCH_EPOCH:
try:
inputsB, labelsB = next(trainB_iter)
except StopIteration:
trainB_iter = iter(train_loader[1])
inputsB, labelsB = next(trainB_iter)
inputsB, labelsB = inputsB.cuda(), labelsB.cuda(non_blocking=True)
optimizer[1].zero_grad()
loss = m._loss(inputsB, labelsB)
loss.backward()
optimizer[1].step()
# Perform the forward pass
preds = model(inputs)
# Compute the loss
loss = loss_fun(preds, labels)
# Perform the backward pass
optimizer[0].zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
# Update the parameters
optimizer[0].step()
# Compute the errors
if cfg.TASK == "col":
preds = preds.permute(0, 2, 3, 1)
preds = preds.reshape(-1, preds.size(3))
labels = labels.reshape(-1)
mb_size = inputs.size(0) * inputs.size(2) * inputs.size(3) * cfg.NUM_GPUS
else:
mb_size = inputs.size(0) * cfg.NUM_GPUS
if cfg.TASK == "seg":
# top1_err is in fact inter; top5_err is in fact union
top1_err, top5_err = meters.inter_union(preds, labels, cfg.MODEL.NUM_CLASSES)
else:
ks = [1, min(5, cfg.MODEL.NUM_CLASSES)] # rot only has 4 classes
top1_err, top5_err = meters.topk_errors(preds, labels, ks)
# Combine the stats across the GPUs (no reduction if 1 GPU used)
loss, top1_err, top5_err = dist.scaled_all_reduce([loss, top1_err, top5_err])
# Copy the stats from GPU to CPU (sync point)
loss = loss.item()
if cfg.TASK == "seg":
top1_err, top5_err = top1_err.cpu().numpy(), top5_err.cpu().numpy()
else:
top1_err, top5_err = top1_err.item(), top5_err.item()
train_meter.iter_toc()
# Update and log stats
train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size)
train_meter.log_iter_stats(cur_epoch, cur_iter)
train_meter.iter_tic()
# Log epoch stats
train_meter.log_epoch_stats(cur_epoch)
train_meter.reset()
# Log genotype
genotype = m.genotype()
logger.info("genotype = %s", genotype)
logger.info(F.softmax(m.net_.alphas_normal, dim=-1))
logger.info(F.softmax(m.net_.alphas_reduce, dim=-1))
@torch.no_grad()
def test_epoch(test_loader, model, test_meter, cur_epoch):
"""Evaluates the model on the test set."""
# Enable eval mode
model.eval()
test_meter.iter_tic()
for cur_iter, (inputs, labels) in enumerate(test_loader):
# Transfer the data to the current GPU device
inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True)
# Compute the predictions
preds = model(inputs)
# Compute the errors
if cfg.TASK == "col":
preds = preds.permute(0, 2, 3, 1)
preds = preds.reshape(-1, preds.size(3))
labels = labels.reshape(-1)
mb_size = inputs.size(0) * inputs.size(2) * inputs.size(3) * cfg.NUM_GPUS
else:
mb_size = inputs.size(0) * cfg.NUM_GPUS
if cfg.TASK == "seg":
# top1_err is in fact inter; top5_err is in fact union
top1_err, top5_err = meters.inter_union(preds, labels, cfg.MODEL.NUM_CLASSES)
else:
ks = [1, min(5, cfg.MODEL.NUM_CLASSES)] # rot only has 4 classes
top1_err, top5_err = meters.topk_errors(preds, labels, ks)
# Combine the errors across the GPUs (no reduction if 1 GPU used)
top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err])
# Copy the errors from GPU to CPU (sync point)
if cfg.TASK == "seg":
top1_err, top5_err = top1_err.cpu().numpy(), top5_err.cpu().numpy()
else:
top1_err, top5_err = top1_err.item(), top5_err.item()
test_meter.iter_toc()
# Update and log stats
test_meter.update_stats(top1_err, top5_err, mb_size)
test_meter.log_iter_stats(cur_epoch, cur_iter)
test_meter.iter_tic()
# Log epoch stats
test_meter.log_epoch_stats(cur_epoch)
test_meter.reset()
def train_model():
"""Trains the model."""
# Setup training/testing environment
setup_env()
# Construct the model, loss_fun, and optimizer
model = setup_model()
loss_fun = builders.build_loss_fun().cuda()
if "search" in cfg.MODEL.TYPE:
params_w = [v for k, v in model.named_parameters() if "alphas" not in k]
params_a = [v for k, v in model.named_parameters() if "alphas" in k]
optimizer_w = torch.optim.SGD(
params=params_w,
lr=cfg.OPTIM.BASE_LR,
momentum=cfg.OPTIM.MOMENTUM,
weight_decay=cfg.OPTIM.WEIGHT_DECAY,
dampening=cfg.OPTIM.DAMPENING,
nesterov=cfg.OPTIM.NESTEROV
)
if cfg.OPTIM.ARCH_OPTIM == "adam":
optimizer_a = torch.optim.Adam(
params=params_a,
lr=cfg.OPTIM.ARCH_BASE_LR,
betas=(0.5, 0.999),
weight_decay=cfg.OPTIM.ARCH_WEIGHT_DECAY
)
elif cfg.OPTIM.ARCH_OPTIM == "sgd":
optimizer_a = torch.optim.SGD(
params=params_a,
lr=cfg.OPTIM.ARCH_BASE_LR,
momentum=cfg.OPTIM.MOMENTUM,
weight_decay=cfg.OPTIM.ARCH_WEIGHT_DECAY,
dampening=cfg.OPTIM.DAMPENING,
nesterov=cfg.OPTIM.NESTEROV
)
optimizer = [optimizer_w, optimizer_a]
else:
optimizer = optim.construct_optimizer(model)
# Load checkpoint or initial weights
start_epoch = 0
if cfg.TRAIN.AUTO_RESUME and checkpoint.has_checkpoint():
last_checkpoint = checkpoint.get_last_checkpoint()
checkpoint_epoch = checkpoint.load_checkpoint(last_checkpoint, model, optimizer)
logger.info("Loaded checkpoint from: {}".format(last_checkpoint))
start_epoch = checkpoint_epoch + 1
elif cfg.TRAIN.WEIGHTS:
checkpoint.load_checkpoint(cfg.TRAIN.WEIGHTS, model)
logger.info("Loaded initial weights from: {}".format(cfg.TRAIN.WEIGHTS))
# Create data loaders and meters
if cfg.TRAIN.PORTION < 1:
if "search" in cfg.MODEL.TYPE:
train_loader = [loader._construct_loader(
dataset_name=cfg.TRAIN.DATASET,
split=cfg.TRAIN.SPLIT,
batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS),
shuffle=True,
drop_last=True,
portion=cfg.TRAIN.PORTION,
side="l"
),
loader._construct_loader(
dataset_name=cfg.TRAIN.DATASET,
split=cfg.TRAIN.SPLIT,
batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS),
shuffle=True,
drop_last=True,
portion=cfg.TRAIN.PORTION,
side="r"
)]
else:
train_loader = loader._construct_loader(
dataset_name=cfg.TRAIN.DATASET,
split=cfg.TRAIN.SPLIT,
batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS),
shuffle=True,
drop_last=True,
portion=cfg.TRAIN.PORTION,
side="l"
)
test_loader = loader._construct_loader(
dataset_name=cfg.TRAIN.DATASET,
split=cfg.TRAIN.SPLIT,
batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS),
shuffle=False,
drop_last=False,
portion=cfg.TRAIN.PORTION,
side="r"
)
else:
train_loader = loader.construct_train_loader()
test_loader = loader.construct_test_loader()
train_meter_type = meters.TrainMeterIoU if cfg.TASK == "seg" else meters.TrainMeter
test_meter_type = meters.TestMeterIoU if cfg.TASK == "seg" else meters.TestMeter
l = train_loader[0] if isinstance(train_loader, list) else train_loader
train_meter = train_meter_type(len(l))
test_meter = test_meter_type(len(test_loader))
# Compute model and loader timings
if start_epoch == 0 and cfg.PREC_TIME.NUM_ITER > 0:
l = train_loader[0] if isinstance(train_loader, list) else train_loader
benchmark.compute_time_full(model, loss_fun, l, test_loader)
# Perform the training loop
logger.info("Start epoch: {}".format(start_epoch + 1))
for cur_epoch in range(start_epoch, cfg.OPTIM.MAX_EPOCH):
# Train for one epoch
f = search_epoch if "search" in cfg.MODEL.TYPE else train_epoch
f(train_loader, model, loss_fun, optimizer, train_meter, cur_epoch)
# Compute precise BN stats
if cfg.BN.USE_PRECISE_STATS:
net.compute_precise_bn_stats(model, train_loader)
# Save a checkpoint
if (cur_epoch + 1) % cfg.TRAIN.CHECKPOINT_PERIOD == 0:
checkpoint_file = checkpoint.save_checkpoint(model, optimizer, cur_epoch)
logger.info("Wrote checkpoint to: {}".format(checkpoint_file))
# Evaluate the model
next_epoch = cur_epoch + 1
if next_epoch % cfg.TRAIN.EVAL_PERIOD == 0 or next_epoch == cfg.OPTIM.MAX_EPOCH:
test_epoch(test_loader, model, test_meter, cur_epoch)
def test_model():
"""Evaluates a trained model."""
# Setup training/testing environment
setup_env()
# Construct the model
model = setup_model()
# Load model weights
checkpoint.load_checkpoint(cfg.TEST.WEIGHTS, model)
logger.info("Loaded model weights from: {}".format(cfg.TEST.WEIGHTS))
# Create data loaders and meters
test_loader = loader.construct_test_loader()
test_meter = meters.TestMeter(len(test_loader))
# Evaluate the model
test_epoch(test_loader, model, test_meter, 0)
def time_model():
"""Times model and data loader."""
# Setup training/testing environment
setup_env()
# Construct the model and loss_fun
model = setup_model()
loss_fun = builders.build_loss_fun().cuda()
# Create data loaders
train_loader = loader.construct_train_loader()
test_loader = loader.construct_test_loader()
# Compute model and loader timings
benchmark.compute_time_full(model, loss_fun, train_loader, test_loader)

View File

@@ -0,0 +1,406 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""AnyNet models."""
import pycls.core.net as net
import torch.nn as nn
from pycls.core.config import cfg
def get_stem_fun(stem_type):
"""Retrieves the stem function by name."""
stem_funs = {
"res_stem_cifar": ResStemCifar,
"res_stem_in": ResStemIN,
"simple_stem_in": SimpleStemIN,
}
err_str = "Stem type '{}' not supported"
assert stem_type in stem_funs.keys(), err_str.format(stem_type)
return stem_funs[stem_type]
def get_block_fun(block_type):
"""Retrieves the block function by name."""
block_funs = {
"vanilla_block": VanillaBlock,
"res_basic_block": ResBasicBlock,
"res_bottleneck_block": ResBottleneckBlock,
}
err_str = "Block type '{}' not supported"
assert block_type in block_funs.keys(), err_str.format(block_type)
return block_funs[block_type]
class AnyHead(nn.Module):
"""AnyNet head: AvgPool, 1x1."""
def __init__(self, w_in, nc):
super(AnyHead, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(w_in, nc, bias=True)
def forward(self, x):
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
@staticmethod
def complexity(cx, w_in, nc):
cx["h"], cx["w"] = 1, 1
cx = net.complexity_conv2d(cx, w_in, nc, 1, 1, 0, bias=True)
return cx
class VanillaBlock(nn.Module):
"""Vanilla block: [3x3 conv, BN, Relu] x2."""
def __init__(self, w_in, w_out, stride, bm=None, gw=None, se_r=None):
err_str = "Vanilla block does not support bm, gw, and se_r options"
assert bm is None and gw is None and se_r is None, err_str
super(VanillaBlock, self).__init__()
self.a = nn.Conv2d(w_in, w_out, 3, stride=stride, padding=1, bias=False)
self.a_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.a_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
self.b = nn.Conv2d(w_out, w_out, 3, stride=1, padding=1, bias=False)
self.b_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.b_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, bm=None, gw=None, se_r=None):
err_str = "Vanilla block does not support bm, gw, and se_r options"
assert bm is None and gw is None and se_r is None, err_str
cx = net.complexity_conv2d(cx, w_in, w_out, 3, stride, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
cx = net.complexity_conv2d(cx, w_out, w_out, 3, 1, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class BasicTransform(nn.Module):
"""Basic transformation: [3x3 conv, BN, Relu] x2."""
def __init__(self, w_in, w_out, stride):
super(BasicTransform, self).__init__()
self.a = nn.Conv2d(w_in, w_out, 3, stride=stride, padding=1, bias=False)
self.a_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.a_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
self.b = nn.Conv2d(w_out, w_out, 3, stride=1, padding=1, bias=False)
self.b_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.b_bn.final_bn = True
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride):
cx = net.complexity_conv2d(cx, w_in, w_out, 3, stride, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
cx = net.complexity_conv2d(cx, w_out, w_out, 3, 1, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class ResBasicBlock(nn.Module):
"""Residual basic block: x + F(x), F = basic transform."""
def __init__(self, w_in, w_out, stride, bm=None, gw=None, se_r=None):
err_str = "Basic transform does not support bm, gw, and se_r options"
assert bm is None and gw is None and se_r is None, err_str
super(ResBasicBlock, self).__init__()
self.proj_block = (w_in != w_out) or (stride != 1)
if self.proj_block:
self.proj = nn.Conv2d(w_in, w_out, 1, stride=stride, padding=0, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.f = BasicTransform(w_in, w_out, stride)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
def forward(self, x):
if self.proj_block:
x = self.bn(self.proj(x)) + self.f(x)
else:
x = x + self.f(x)
x = self.relu(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, bm=None, gw=None, se_r=None):
err_str = "Basic transform does not support bm, gw, and se_r options"
assert bm is None and gw is None and se_r is None, err_str
proj_block = (w_in != w_out) or (stride != 1)
if proj_block:
h, w = cx["h"], cx["w"]
cx = net.complexity_conv2d(cx, w_in, w_out, 1, stride, 0)
cx = net.complexity_batchnorm2d(cx, w_out)
cx["h"], cx["w"] = h, w # parallel branch
cx = BasicTransform.complexity(cx, w_in, w_out, stride)
return cx
class SE(nn.Module):
"""Squeeze-and-Excitation (SE) block: AvgPool, FC, ReLU, FC, Sigmoid."""
def __init__(self, w_in, w_se):
super(SE, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.f_ex = nn.Sequential(
nn.Conv2d(w_in, w_se, 1, bias=True),
nn.ReLU(inplace=cfg.MEM.RELU_INPLACE),
nn.Conv2d(w_se, w_in, 1, bias=True),
nn.Sigmoid(),
)
def forward(self, x):
return x * self.f_ex(self.avg_pool(x))
@staticmethod
def complexity(cx, w_in, w_se):
h, w = cx["h"], cx["w"]
cx["h"], cx["w"] = 1, 1
cx = net.complexity_conv2d(cx, w_in, w_se, 1, 1, 0, bias=True)
cx = net.complexity_conv2d(cx, w_se, w_in, 1, 1, 0, bias=True)
cx["h"], cx["w"] = h, w
return cx
class BottleneckTransform(nn.Module):
"""Bottleneck transformation: 1x1, 3x3 [+SE], 1x1."""
def __init__(self, w_in, w_out, stride, bm, gw, se_r):
super(BottleneckTransform, self).__init__()
w_b = int(round(w_out * bm))
g = w_b // gw
self.a = nn.Conv2d(w_in, w_b, 1, stride=1, padding=0, bias=False)
self.a_bn = nn.BatchNorm2d(w_b, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.a_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
self.b = nn.Conv2d(w_b, w_b, 3, stride=stride, padding=1, groups=g, bias=False)
self.b_bn = nn.BatchNorm2d(w_b, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.b_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
if se_r:
w_se = int(round(w_in * se_r))
self.se = SE(w_b, w_se)
self.c = nn.Conv2d(w_b, w_out, 1, stride=1, padding=0, bias=False)
self.c_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.c_bn.final_bn = True
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, bm, gw, se_r):
w_b = int(round(w_out * bm))
g = w_b // gw
cx = net.complexity_conv2d(cx, w_in, w_b, 1, 1, 0)
cx = net.complexity_batchnorm2d(cx, w_b)
cx = net.complexity_conv2d(cx, w_b, w_b, 3, stride, 1, g)
cx = net.complexity_batchnorm2d(cx, w_b)
if se_r:
w_se = int(round(w_in * se_r))
cx = SE.complexity(cx, w_b, w_se)
cx = net.complexity_conv2d(cx, w_b, w_out, 1, 1, 0)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class ResBottleneckBlock(nn.Module):
"""Residual bottleneck block: x + F(x), F = bottleneck transform."""
def __init__(self, w_in, w_out, stride, bm=1.0, gw=1, se_r=None):
super(ResBottleneckBlock, self).__init__()
# Use skip connection with projection if shape changes
self.proj_block = (w_in != w_out) or (stride != 1)
if self.proj_block:
self.proj = nn.Conv2d(w_in, w_out, 1, stride=stride, padding=0, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.f = BottleneckTransform(w_in, w_out, stride, bm, gw, se_r)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
def forward(self, x):
if self.proj_block:
x = self.bn(self.proj(x)) + self.f(x)
else:
x = x + self.f(x)
x = self.relu(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, bm=1.0, gw=1, se_r=None):
proj_block = (w_in != w_out) or (stride != 1)
if proj_block:
h, w = cx["h"], cx["w"]
cx = net.complexity_conv2d(cx, w_in, w_out, 1, stride, 0)
cx = net.complexity_batchnorm2d(cx, w_out)
cx["h"], cx["w"] = h, w # parallel branch
cx = BottleneckTransform.complexity(cx, w_in, w_out, stride, bm, gw, se_r)
return cx
class ResStemCifar(nn.Module):
"""ResNet stem for CIFAR: 3x3, BN, ReLU."""
def __init__(self, w_in, w_out):
super(ResStemCifar, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 3, stride=1, padding=1, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out):
cx = net.complexity_conv2d(cx, w_in, w_out, 3, 1, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class ResStemIN(nn.Module):
"""ResNet stem for ImageNet: 7x7, BN, ReLU, MaxPool."""
def __init__(self, w_in, w_out):
super(ResStemIN, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 7, stride=2, padding=3, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
self.pool = nn.MaxPool2d(3, stride=2, padding=1)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out):
cx = net.complexity_conv2d(cx, w_in, w_out, 7, 2, 3)
cx = net.complexity_batchnorm2d(cx, w_out)
cx = net.complexity_maxpool2d(cx, 3, 2, 1)
return cx
class SimpleStemIN(nn.Module):
"""Simple stem for ImageNet: 3x3, BN, ReLU."""
def __init__(self, w_in, w_out):
super(SimpleStemIN, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 3, stride=2, padding=1, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out):
cx = net.complexity_conv2d(cx, w_in, w_out, 3, 2, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class AnyStage(nn.Module):
"""AnyNet stage (sequence of blocks w/ the same output shape)."""
def __init__(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r):
super(AnyStage, self).__init__()
for i in range(d):
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
name = "b{}".format(i + 1)
self.add_module(name, block_fun(b_w_in, w_out, b_stride, bm, gw, se_r))
def forward(self, x):
for block in self.children():
x = block(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, d, block_fun, bm, gw, se_r):
for i in range(d):
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
cx = block_fun.complexity(cx, b_w_in, w_out, b_stride, bm, gw, se_r)
return cx
class AnyNet(nn.Module):
"""AnyNet model."""
@staticmethod
def get_args():
return {
"stem_type": cfg.ANYNET.STEM_TYPE,
"stem_w": cfg.ANYNET.STEM_W,
"block_type": cfg.ANYNET.BLOCK_TYPE,
"ds": cfg.ANYNET.DEPTHS,
"ws": cfg.ANYNET.WIDTHS,
"ss": cfg.ANYNET.STRIDES,
"bms": cfg.ANYNET.BOT_MULS,
"gws": cfg.ANYNET.GROUP_WS,
"se_r": cfg.ANYNET.SE_R if cfg.ANYNET.SE_ON else None,
"nc": cfg.MODEL.NUM_CLASSES,
}
def __init__(self, **kwargs):
super(AnyNet, self).__init__()
kwargs = self.get_args() if not kwargs else kwargs
#print(kwargs)
self._construct(**kwargs)
self.apply(net.init_weights)
def _construct(self, stem_type, stem_w, block_type, ds, ws, ss, bms, gws, se_r, nc):
# Generate dummy bot muls and gs for models that do not use them
bms = bms if bms else [None for _d in ds]
gws = gws if gws else [None for _d in ds]
stage_params = list(zip(ds, ws, ss, bms, gws))
stem_fun = get_stem_fun(stem_type)
self.stem = stem_fun(3, stem_w)
block_fun = get_block_fun(block_type)
prev_w = stem_w
for i, (d, w, s, bm, gw) in enumerate(stage_params):
name = "s{}".format(i + 1)
self.add_module(name, AnyStage(prev_w, w, s, d, block_fun, bm, gw, se_r))
prev_w = w
self.head = AnyHead(w_in=prev_w, nc=nc)
def forward(self, x, get_ints=False):
for module in self.children():
x = module(x)
return x
@staticmethod
def complexity(cx, **kwargs):
"""Computes model complexity. If you alter the model, make sure to update."""
kwargs = AnyNet.get_args() if not kwargs else kwargs
return AnyNet._complexity(cx, **kwargs)
@staticmethod
def _complexity(cx, stem_type, stem_w, block_type, ds, ws, ss, bms, gws, se_r, nc):
bms = bms if bms else [None for _d in ds]
gws = gws if gws else [None for _d in ds]
stage_params = list(zip(ds, ws, ss, bms, gws))
stem_fun = get_stem_fun(stem_type)
cx = stem_fun.complexity(cx, 3, stem_w)
block_fun = get_block_fun(block_type)
prev_w = stem_w
for d, w, s, bm, gw in stage_params:
cx = AnyStage.complexity(cx, prev_w, w, s, d, block_fun, bm, gw, se_r)
prev_w = w
cx = AnyHead.complexity(cx, prev_w, nc)
return cx

View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import torch
import torch.nn as nn
from pycls.core.config import cfg
def Preprocess(x):
if cfg.TASK == 'jig':
assert len(x.shape) == 5, 'Wrong tensor dimension for jigsaw'
assert x.shape[1] == cfg.JIGSAW_GRID ** 2, 'Wrong grid for jigsaw'
x = x.view([x.shape[0] * x.shape[1], x.shape[2], x.shape[3], x.shape[4]])
return x
class Classifier(nn.Module):
def __init__(self, channels, num_classes):
super(Classifier, self).__init__()
if cfg.TASK == 'jig':
self.jig_sq = cfg.JIGSAW_GRID ** 2
self.pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(channels * self.jig_sq, num_classes)
elif cfg.TASK == 'col':
self.classifier = nn.Conv2d(channels, num_classes, kernel_size=1, stride=1)
elif cfg.TASK == 'seg':
self.classifier = ASPP(channels, cfg.MODEL.ASPP_CHANNELS, num_classes, cfg.MODEL.ASPP_RATES)
else:
self.pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(channels, num_classes)
def forward(self, x, shape):
if cfg.TASK == 'jig':
x = self.pooling(x)
x = x.view([x.shape[0] // self.jig_sq, x.shape[1] * self.jig_sq, x.shape[2], x.shape[3]])
x = self.classifier(x.view(x.size(0), -1))
elif cfg.TASK in ['col', 'seg']:
x = self.classifier(x)
x = nn.Upsample(shape, mode='bilinear', align_corners=True)(x)
else:
x = self.pooling(x)
x = self.classifier(x.view(x.size(0), -1))
return x
class ASPP(nn.Module):
def __init__(self, in_channels, out_channels, num_classes, rates):
super(ASPP, self).__init__()
assert len(rates) in [1, 3]
self.rates = rates
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.aspp1 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
self.aspp2 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, dilation=rates[0],
padding=rates[0], bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
if len(self.rates) == 3:
self.aspp3 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, dilation=rates[1],
padding=rates[1], bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
self.aspp4 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, dilation=rates[2],
padding=rates[2], bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
self.aspp5 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
self.classifier = nn.Sequential(
nn.Conv2d(out_channels * (len(rates) + 2), out_channels, 1,
bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, num_classes, 1)
)
def forward(self, x):
x1 = self.aspp1(x)
x2 = self.aspp2(x)
x5 = self.global_pooling(x)
x5 = self.aspp5(x5)
x5 = nn.Upsample((x.shape[2], x.shape[3]), mode='bilinear',
align_corners=True)(x5)
if len(self.rates) == 3:
x3 = self.aspp3(x)
x4 = self.aspp4(x)
x = torch.cat((x1, x2, x3, x4, x5), 1)
else:
x = torch.cat((x1, x2, x5), 1)
x = self.classifier(x)
return x

View File

@@ -0,0 +1,232 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""EfficientNet models."""
import pycls.core.net as net
import torch
import torch.nn as nn
from pycls.core.config import cfg
class EffHead(nn.Module):
"""EfficientNet head: 1x1, BN, Swish, AvgPool, Dropout, FC."""
def __init__(self, w_in, w_out, nc):
super(EffHead, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 1, stride=1, padding=0, bias=False)
self.conv_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.conv_swish = Swish()
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
if cfg.EN.DROPOUT_RATIO > 0.0:
self.dropout = nn.Dropout(p=cfg.EN.DROPOUT_RATIO)
self.fc = nn.Linear(w_out, nc, bias=True)
def forward(self, x):
x = self.conv_swish(self.conv_bn(self.conv(x)))
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x) if hasattr(self, "dropout") else x
x = self.fc(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, nc):
cx = net.complexity_conv2d(cx, w_in, w_out, 1, 1, 0)
cx = net.complexity_batchnorm2d(cx, w_out)
cx["h"], cx["w"] = 1, 1
cx = net.complexity_conv2d(cx, w_out, nc, 1, 1, 0, bias=True)
return cx
class Swish(nn.Module):
"""Swish activation function: x * sigmoid(x)."""
def __init__(self):
super(Swish, self).__init__()
def forward(self, x):
return x * torch.sigmoid(x)
class SE(nn.Module):
"""Squeeze-and-Excitation (SE) block w/ Swish: AvgPool, FC, Swish, FC, Sigmoid."""
def __init__(self, w_in, w_se):
super(SE, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.f_ex = nn.Sequential(
nn.Conv2d(w_in, w_se, 1, bias=True),
Swish(),
nn.Conv2d(w_se, w_in, 1, bias=True),
nn.Sigmoid(),
)
def forward(self, x):
return x * self.f_ex(self.avg_pool(x))
@staticmethod
def complexity(cx, w_in, w_se):
h, w = cx["h"], cx["w"]
cx["h"], cx["w"] = 1, 1
cx = net.complexity_conv2d(cx, w_in, w_se, 1, 1, 0, bias=True)
cx = net.complexity_conv2d(cx, w_se, w_in, 1, 1, 0, bias=True)
cx["h"], cx["w"] = h, w
return cx
class MBConv(nn.Module):
"""Mobile inverted bottleneck block w/ SE (MBConv)."""
def __init__(self, w_in, exp_r, kernel, stride, se_r, w_out):
# expansion, 3x3 dwise, BN, Swish, SE, 1x1, BN, skip_connection
super(MBConv, self).__init__()
self.exp = None
w_exp = int(w_in * exp_r)
if w_exp != w_in:
self.exp = nn.Conv2d(w_in, w_exp, 1, stride=1, padding=0, bias=False)
self.exp_bn = nn.BatchNorm2d(w_exp, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.exp_swish = Swish()
dwise_args = {"groups": w_exp, "padding": (kernel - 1) // 2, "bias": False}
self.dwise = nn.Conv2d(w_exp, w_exp, kernel, stride=stride, **dwise_args)
self.dwise_bn = nn.BatchNorm2d(w_exp, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.dwise_swish = Swish()
self.se = SE(w_exp, int(w_in * se_r))
self.lin_proj = nn.Conv2d(w_exp, w_out, 1, stride=1, padding=0, bias=False)
self.lin_proj_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
# Skip connection if in and out shapes are the same (MN-V2 style)
self.has_skip = stride == 1 and w_in == w_out
def forward(self, x):
f_x = x
if self.exp:
f_x = self.exp_swish(self.exp_bn(self.exp(f_x)))
f_x = self.dwise_swish(self.dwise_bn(self.dwise(f_x)))
f_x = self.se(f_x)
f_x = self.lin_proj_bn(self.lin_proj(f_x))
if self.has_skip:
if self.training and cfg.EN.DC_RATIO > 0.0:
f_x = net.drop_connect(f_x, cfg.EN.DC_RATIO)
f_x = x + f_x
return f_x
@staticmethod
def complexity(cx, w_in, exp_r, kernel, stride, se_r, w_out):
w_exp = int(w_in * exp_r)
if w_exp != w_in:
cx = net.complexity_conv2d(cx, w_in, w_exp, 1, 1, 0)
cx = net.complexity_batchnorm2d(cx, w_exp)
padding = (kernel - 1) // 2
cx = net.complexity_conv2d(cx, w_exp, w_exp, kernel, stride, padding, w_exp)
cx = net.complexity_batchnorm2d(cx, w_exp)
cx = SE.complexity(cx, w_exp, int(w_in * se_r))
cx = net.complexity_conv2d(cx, w_exp, w_out, 1, 1, 0)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class EffStage(nn.Module):
"""EfficientNet stage."""
def __init__(self, w_in, exp_r, kernel, stride, se_r, w_out, d):
super(EffStage, self).__init__()
for i in range(d):
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
name = "b{}".format(i + 1)
self.add_module(name, MBConv(b_w_in, exp_r, kernel, b_stride, se_r, w_out))
def forward(self, x):
for block in self.children():
x = block(x)
return x
@staticmethod
def complexity(cx, w_in, exp_r, kernel, stride, se_r, w_out, d):
for i in range(d):
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
cx = MBConv.complexity(cx, b_w_in, exp_r, kernel, b_stride, se_r, w_out)
return cx
class StemIN(nn.Module):
"""EfficientNet stem for ImageNet: 3x3, BN, Swish."""
def __init__(self, w_in, w_out):
super(StemIN, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 3, stride=2, padding=1, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.swish = Swish()
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out):
cx = net.complexity_conv2d(cx, w_in, w_out, 3, 2, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class EffNet(nn.Module):
"""EfficientNet model."""
@staticmethod
def get_args():
return {
"stem_w": cfg.EN.STEM_W,
"ds": cfg.EN.DEPTHS,
"ws": cfg.EN.WIDTHS,
"exp_rs": cfg.EN.EXP_RATIOS,
"se_r": cfg.EN.SE_R,
"ss": cfg.EN.STRIDES,
"ks": cfg.EN.KERNELS,
"head_w": cfg.EN.HEAD_W,
"nc": cfg.MODEL.NUM_CLASSES,
}
def __init__(self):
err_str = "Dataset {} is not supported"
assert cfg.TRAIN.DATASET in ["imagenet"], err_str.format(cfg.TRAIN.DATASET)
assert cfg.TEST.DATASET in ["imagenet"], err_str.format(cfg.TEST.DATASET)
super(EffNet, self).__init__()
self._construct(**EffNet.get_args())
self.apply(net.init_weights)
def _construct(self, stem_w, ds, ws, exp_rs, se_r, ss, ks, head_w, nc):
stage_params = list(zip(ds, ws, exp_rs, ss, ks))
self.stem = StemIN(3, stem_w)
prev_w = stem_w
for i, (d, w, exp_r, stride, kernel) in enumerate(stage_params):
name = "s{}".format(i + 1)
self.add_module(name, EffStage(prev_w, exp_r, kernel, stride, se_r, w, d))
prev_w = w
self.head = EffHead(prev_w, head_w, nc)
def forward(self, x):
for module in self.children():
x = module(x)
return x
@staticmethod
def complexity(cx):
"""Computes model complexity. If you alter the model, make sure to update."""
return EffNet._complexity(cx, **EffNet.get_args())
@staticmethod
def _complexity(cx, stem_w, ds, ws, exp_rs, se_r, ss, ks, head_w, nc):
stage_params = list(zip(ds, ws, exp_rs, ss, ks))
cx = StemIN.complexity(cx, 3, stem_w)
prev_w = stem_w
for d, w, exp_r, stride, kernel in stage_params:
cx = EffStage.complexity(cx, prev_w, exp_r, kernel, stride, se_r, w, d)
prev_w = w
cx = EffHead.complexity(cx, prev_w, head_w, nc)
return cx

View File

@@ -0,0 +1,634 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""NAS genotypes (adopted from DARTS)."""
from collections import namedtuple
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
# NASNet ops
NASNET_OPS = [
'skip_connect',
'conv_3x1_1x3',
'conv_7x1_1x7',
'dil_conv_3x3',
'avg_pool_3x3',
'max_pool_3x3',
'max_pool_5x5',
'max_pool_7x7',
'conv_1x1',
'conv_3x3',
'sep_conv_3x3',
'sep_conv_5x5',
'sep_conv_7x7',
]
# ENAS ops
ENAS_OPS = [
'skip_connect',
'sep_conv_3x3',
'sep_conv_5x5',
'avg_pool_3x3',
'max_pool_3x3',
]
# AmoebaNet ops
AMOEBA_OPS = [
'skip_connect',
'sep_conv_3x3',
'sep_conv_5x5',
'sep_conv_7x7',
'avg_pool_3x3',
'max_pool_3x3',
'dil_sep_conv_3x3',
'conv_7x1_1x7',
]
# NAO ops
NAO_OPS = [
'skip_connect',
'conv_1x1',
'conv_3x3',
'conv_3x1_1x3',
'conv_7x1_1x7',
'max_pool_2x2',
'max_pool_3x3',
'max_pool_5x5',
'avg_pool_2x2',
'avg_pool_3x3',
'avg_pool_5x5',
]
# PNAS ops
PNAS_OPS = [
'sep_conv_3x3',
'sep_conv_5x5',
'sep_conv_7x7',
'conv_7x1_1x7',
'skip_connect',
'avg_pool_3x3',
'max_pool_3x3',
'dil_conv_3x3',
]
# DARTS ops
DARTS_OPS = [
'none',
'max_pool_3x3',
'avg_pool_3x3',
'skip_connect',
'sep_conv_3x3',
'sep_conv_5x5',
'dil_conv_3x3',
'dil_conv_5x5',
]
NASNet = Genotype(
normal=[
('sep_conv_5x5', 1),
('sep_conv_3x3', 0),
('sep_conv_5x5', 0),
('sep_conv_3x3', 0),
('avg_pool_3x3', 1),
('skip_connect', 0),
('avg_pool_3x3', 0),
('avg_pool_3x3', 0),
('sep_conv_3x3', 1),
('skip_connect', 1),
],
normal_concat=[2, 3, 4, 5, 6],
reduce=[
('sep_conv_5x5', 1),
('sep_conv_7x7', 0),
('max_pool_3x3', 1),
('sep_conv_7x7', 0),
('avg_pool_3x3', 1),
('sep_conv_5x5', 0),
('skip_connect', 3),
('avg_pool_3x3', 2),
('sep_conv_3x3', 2),
('max_pool_3x3', 1),
],
reduce_concat=[4, 5, 6],
)
PNASNet = Genotype(
normal=[
('sep_conv_5x5', 0),
('max_pool_3x3', 0),
('sep_conv_7x7', 1),
('max_pool_3x3', 1),
('sep_conv_5x5', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 4),
('max_pool_3x3', 1),
('sep_conv_3x3', 0),
('skip_connect', 1),
],
normal_concat=[2, 3, 4, 5, 6],
reduce=[
('sep_conv_5x5', 0),
('max_pool_3x3', 0),
('sep_conv_7x7', 1),
('max_pool_3x3', 1),
('sep_conv_5x5', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 4),
('max_pool_3x3', 1),
('sep_conv_3x3', 0),
('skip_connect', 1),
],
reduce_concat=[2, 3, 4, 5, 6],
)
AmoebaNet = Genotype(
normal=[
('avg_pool_3x3', 0),
('max_pool_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_5x5', 2),
('sep_conv_3x3', 0),
('avg_pool_3x3', 3),
('sep_conv_3x3', 1),
('skip_connect', 1),
('skip_connect', 0),
('avg_pool_3x3', 1),
],
normal_concat=[4, 5, 6],
reduce=[
('avg_pool_3x3', 0),
('sep_conv_3x3', 1),
('max_pool_3x3', 0),
('sep_conv_7x7', 2),
('sep_conv_7x7', 0),
('avg_pool_3x3', 1),
('max_pool_3x3', 0),
('max_pool_3x3', 1),
('conv_7x1_1x7', 0),
('sep_conv_3x3', 5),
],
reduce_concat=[3, 4, 6]
)
DARTS_V1 = Genotype(
normal=[
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('skip_connect', 0),
('sep_conv_3x3', 1),
('skip_connect', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('skip_connect', 2)
],
normal_concat=[2, 3, 4, 5],
reduce=[
('max_pool_3x3', 0),
('max_pool_3x3', 1),
('skip_connect', 2),
('max_pool_3x3', 0),
('max_pool_3x3', 0),
('skip_connect', 2),
('skip_connect', 2),
('avg_pool_3x3', 0)
],
reduce_concat=[2, 3, 4, 5]
)
DARTS_V2 = Genotype(
normal=[
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 1),
('skip_connect', 0),
('skip_connect', 0),
('dil_conv_3x3', 2)
],
normal_concat=[2, 3, 4, 5],
reduce=[
('max_pool_3x3', 0),
('max_pool_3x3', 1),
('skip_connect', 2),
('max_pool_3x3', 1),
('max_pool_3x3', 0),
('skip_connect', 2),
('skip_connect', 2),
('max_pool_3x3', 1)
],
reduce_concat=[2, 3, 4, 5]
)
PDARTS = Genotype(
normal=[
('skip_connect', 0),
('dil_conv_3x3', 1),
('skip_connect', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 3),
('sep_conv_3x3', 0),
('dil_conv_5x5', 4)
],
normal_concat=range(2, 6),
reduce=[
('avg_pool_3x3', 0),
('sep_conv_5x5', 1),
('sep_conv_3x3', 0),
('dil_conv_5x5', 2),
('max_pool_3x3', 0),
('dil_conv_3x3', 1),
('dil_conv_3x3', 1),
('dil_conv_5x5', 3)
],
reduce_concat=range(2, 6)
)
PCDARTS_C10 = Genotype(
normal=[
('sep_conv_3x3', 1),
('skip_connect', 0),
('sep_conv_3x3', 0),
('dil_conv_3x3', 1),
('sep_conv_5x5', 0),
('sep_conv_3x3', 1),
('avg_pool_3x3', 0),
('dil_conv_3x3', 1)
],
normal_concat=range(2, 6),
reduce=[
('sep_conv_5x5', 1),
('max_pool_3x3', 0),
('sep_conv_5x5', 1),
('sep_conv_5x5', 2),
('sep_conv_3x3', 0),
('sep_conv_3x3', 3),
('sep_conv_3x3', 1),
('sep_conv_3x3', 2)
],
reduce_concat=range(2, 6)
)
PCDARTS_IN1K = Genotype(
normal=[
('skip_connect', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 0),
('skip_connect', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 3),
('sep_conv_3x3', 1),
('dil_conv_5x5', 4)
],
normal_concat=range(2, 6),
reduce=[
('sep_conv_3x3', 0),
('skip_connect', 1),
('dil_conv_5x5', 2),
('max_pool_3x3', 1),
('sep_conv_3x3', 2),
('sep_conv_3x3', 1),
('sep_conv_5x5', 0),
('sep_conv_3x3', 3)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET_CLS = Genotype(
normal=[
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 0),
('sep_conv_3x3', 2),
('sep_conv_5x5', 1),
('sep_conv_3x3', 0)
],
normal_concat=range(2, 6),
reduce=[
('max_pool_3x3', 0),
('skip_connect', 1),
('max_pool_3x3', 0),
('dil_conv_5x5', 2),
('max_pool_3x3', 0),
('sep_conv_3x3', 2),
('sep_conv_3x3', 4),
('dil_conv_5x5', 3)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET_ROT = Genotype(
normal=[
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1)
],
normal_concat=range(2, 6),
reduce=[
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 2),
('sep_conv_3x3', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 2),
('sep_conv_3x3', 4),
('sep_conv_5x5', 2)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET_COL = Genotype(
normal=[
('skip_connect', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 1),
('skip_connect', 0),
('sep_conv_3x3', 0),
('sep_conv_3x3', 3),
('sep_conv_3x3', 0),
('sep_conv_3x3', 2)
],
normal_concat=range(2, 6),
reduce=[
('max_pool_3x3', 0),
('sep_conv_3x3', 1),
('max_pool_3x3', 0),
('sep_conv_3x3', 1),
('max_pool_3x3', 0),
('sep_conv_5x5', 3),
('max_pool_3x3', 0),
('sep_conv_3x3', 4)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET_JIG = Genotype(
normal=[
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 3),
('sep_conv_3x3', 1),
('sep_conv_5x5', 0)
],
normal_concat=range(2, 6),
reduce=[
('sep_conv_5x5', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_5x5', 0),
('sep_conv_3x3', 1)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET22K_CLS = Genotype(
normal=[
('sep_conv_3x3', 1),
('skip_connect', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 2),
('sep_conv_3x3', 1),
('sep_conv_3x3', 2),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0)
],
normal_concat=range(2, 6),
reduce=[
('max_pool_3x3', 0),
('max_pool_3x3', 1),
('dil_conv_5x5', 2),
('max_pool_3x3', 0),
('dil_conv_5x5', 3),
('dil_conv_5x5', 2),
('dil_conv_5x5', 4),
('dil_conv_5x5', 3)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET22K_ROT = Genotype(
normal=[
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1)
],
normal_concat=range(2, 6),
reduce=[
('max_pool_3x3', 0),
('sep_conv_5x5', 1),
('dil_conv_5x5', 2),
('sep_conv_5x5', 0),
('dil_conv_5x5', 3),
('sep_conv_3x3', 2),
('sep_conv_3x3', 4),
('sep_conv_3x3', 3)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET22K_COL = Genotype(
normal=[
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 2),
('sep_conv_3x3', 1),
('sep_conv_3x3', 3),
('sep_conv_3x3', 0)
],
normal_concat=range(2, 6),
reduce=[
('max_pool_3x3', 0),
('skip_connect', 1),
('dil_conv_5x5', 2),
('sep_conv_3x3', 0),
('sep_conv_3x3', 3),
('sep_conv_3x3', 0),
('sep_conv_3x3', 4),
('sep_conv_5x5', 1)
],
reduce_concat=range(2, 6)
)
UNNAS_IMAGENET22K_JIG = Genotype(
normal=[
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 0),
('sep_conv_3x3', 4)
],
normal_concat=range(2, 6),
reduce=[
('sep_conv_5x5', 0),
('skip_connect', 1),
('sep_conv_5x5', 0),
('sep_conv_3x3', 2),
('sep_conv_5x5', 0),
('sep_conv_5x5', 3),
('sep_conv_5x5', 0),
('sep_conv_5x5', 4)
],
reduce_concat=range(2, 6)
)
UNNAS_CITYSCAPES_SEG = Genotype(
normal=[
('skip_connect', 0),
('sep_conv_5x5', 1),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1)
],
normal_concat=range(2, 6),
reduce=[
('sep_conv_3x3', 0),
('avg_pool_3x3', 1),
('avg_pool_3x3', 1),
('sep_conv_5x5', 0),
('sep_conv_3x3', 2),
('sep_conv_5x5', 0),
('sep_conv_3x3', 4),
('sep_conv_5x5', 2)
],
reduce_concat=range(2, 6)
)
UNNAS_CITYSCAPES_ROT = Genotype(
normal=[
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 2),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 3),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0)
],
normal_concat=range(2, 6),
reduce=[
('max_pool_3x3', 0),
('sep_conv_5x5', 1),
('sep_conv_5x5', 2),
('sep_conv_5x5', 1),
('sep_conv_5x5', 3),
('dil_conv_5x5', 2),
('sep_conv_5x5', 2),
('sep_conv_5x5', 0)
],
reduce_concat=range(2, 6)
)
UNNAS_CITYSCAPES_COL = Genotype(
normal=[
('dil_conv_3x3', 1),
('sep_conv_3x3', 0),
('skip_connect', 0),
('sep_conv_5x5', 2),
('dil_conv_3x3', 3),
('skip_connect', 0),
('skip_connect', 0),
('sep_conv_3x3', 1)
],
normal_concat=range(2, 6),
reduce=[
('avg_pool_3x3', 1),
('avg_pool_3x3', 0),
('avg_pool_3x3', 1),
('avg_pool_3x3', 0),
('avg_pool_3x3', 1),
('avg_pool_3x3', 0),
('avg_pool_3x3', 1),
('skip_connect', 4)
],
reduce_concat=range(2, 6)
)
UNNAS_CITYSCAPES_JIG = Genotype(
normal=[
('dil_conv_5x5', 1),
('sep_conv_5x5', 0),
('sep_conv_3x3', 0),
('sep_conv_3x3', 1),
('sep_conv_3x3', 0),
('sep_conv_3x3', 2),
('sep_conv_3x3', 0),
('dil_conv_5x5', 1)
],
normal_concat=range(2, 6),
reduce=[
('avg_pool_3x3', 0),
('skip_connect', 1),
('dil_conv_5x5', 1),
('dil_conv_5x5', 2),
('dil_conv_5x5', 2),
('dil_conv_5x5', 0),
('dil_conv_5x5', 3),
('dil_conv_5x5', 2)
],
reduce_concat=range(2, 6)
)
# Supported genotypes
GENOTYPES = {
'nas': NASNet,
'pnas': PNASNet,
'amoeba': AmoebaNet,
'darts_v1': DARTS_V1,
'darts_v2': DARTS_V2,
'pdarts': PDARTS,
'pcdarts_c10': PCDARTS_C10,
'pcdarts_in1k': PCDARTS_IN1K,
'unnas_imagenet_cls': UNNAS_IMAGENET_CLS,
'unnas_imagenet_rot': UNNAS_IMAGENET_ROT,
'unnas_imagenet_col': UNNAS_IMAGENET_COL,
'unnas_imagenet_jig': UNNAS_IMAGENET_JIG,
'unnas_imagenet22k_cls': UNNAS_IMAGENET22K_CLS,
'unnas_imagenet22k_rot': UNNAS_IMAGENET22K_ROT,
'unnas_imagenet22k_col': UNNAS_IMAGENET22K_COL,
'unnas_imagenet22k_jig': UNNAS_IMAGENET22K_JIG,
'unnas_cityscapes_seg': UNNAS_CITYSCAPES_SEG,
'unnas_cityscapes_rot': UNNAS_CITYSCAPES_ROT,
'unnas_cityscapes_col': UNNAS_CITYSCAPES_COL,
'unnas_cityscapes_jig': UNNAS_CITYSCAPES_JIG,
'custom': None,
}

View File

@@ -0,0 +1,299 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""NAS network (adopted from DARTS)."""
from torch.autograd import Variable
import torch
import torch.nn as nn
import pycls.core.logging as logging
from pycls.core.config import cfg
from pycls.models.common import Preprocess
from pycls.models.common import Classifier
from pycls.models.nas.genotypes import GENOTYPES
from pycls.models.nas.genotypes import Genotype
from pycls.models.nas.operations import FactorizedReduce
from pycls.models.nas.operations import OPS
from pycls.models.nas.operations import ReLUConvBN
from pycls.models.nas.operations import Identity
logger = logging.get_logger(__name__)
def drop_path(x, drop_prob):
"""Drop path (ported from DARTS)."""
if drop_prob > 0.:
keep_prob = 1.-drop_prob
mask = Variable(
torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob)
)
x.div_(keep_prob)
x.mul_(mask)
return x
class Cell(nn.Module):
"""NAS cell (ported from DARTS)."""
def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
super(Cell, self).__init__()
logger.info('{}, {}, {}'.format(C_prev_prev, C_prev, C))
if reduction_prev:
self.preprocess0 = FactorizedReduce(C_prev_prev, C)
else:
self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
if reduction:
op_names, indices = zip(*genotype.reduce)
concat = genotype.reduce_concat
else:
op_names, indices = zip(*genotype.normal)
concat = genotype.normal_concat
self._compile(C, op_names, indices, concat, reduction)
def _compile(self, C, op_names, indices, concat, reduction):
assert len(op_names) == len(indices)
self._steps = len(op_names) // 2
self._concat = concat
self.multiplier = len(concat)
self._ops = nn.ModuleList()
for name, index in zip(op_names, indices):
stride = 2 if reduction and index < 2 else 1
op = OPS[name](C, stride, True)
self._ops += [op]
self._indices = indices
def forward(self, s0, s1, drop_prob):
s0 = self.preprocess0(s0)
s1 = self.preprocess1(s1)
states = [s0, s1]
for i in range(self._steps):
h1 = states[self._indices[2*i]]
h2 = states[self._indices[2*i+1]]
op1 = self._ops[2*i]
op2 = self._ops[2*i+1]
h1 = op1(h1)
h2 = op2(h2)
if self.training and drop_prob > 0.:
if not isinstance(op1, Identity):
h1 = drop_path(h1, drop_prob)
if not isinstance(op2, Identity):
h2 = drop_path(h2, drop_prob)
s = h1 + h2
states += [s]
return torch.cat([states[i] for i in self._concat], dim=1)
class AuxiliaryHeadCIFAR(nn.Module):
def __init__(self, C, num_classes):
"""assuming input size 8x8"""
super(AuxiliaryHeadCIFAR, self).__init__()
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_classes)
def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0),-1))
return x
class AuxiliaryHeadImageNet(nn.Module):
def __init__(self, C, num_classes):
"""assuming input size 14x14"""
super(AuxiliaryHeadImageNet, self).__init__()
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
# NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
# Commenting it out for consistency with the experiments in the paper.
# nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_classes)
def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0),-1))
return x
class NetworkCIFAR(nn.Module):
"""CIFAR network (ported from DARTS)."""
def __init__(self, C, num_classes, layers, auxiliary, genotype):
super(NetworkCIFAR, self).__init__()
self._layers = layers
self._auxiliary = auxiliary
stem_multiplier = 3
C_curr = stem_multiplier*C
self.stem = nn.Sequential(
nn.Conv2d(cfg.MODEL.INPUT_CHANNELS, C_curr, 3, padding=1, bias=False),
nn.BatchNorm2d(C_curr)
)
C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
self.cells = nn.ModuleList()
reduction_prev = False
for i in range(layers):
if i in [layers//3, 2*layers//3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
self.cells += [cell]
C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
if i == 2*layers//3:
C_to_auxiliary = C_prev
if auxiliary:
self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
self.classifier = Classifier(C_prev, num_classes)
def forward(self, input):
input = Preprocess(input)
logits_aux = None
s0 = s1 = self.stem(input)
for i, cell in enumerate(self.cells):
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
if i == 2*self._layers//3:
if self._auxiliary and self.training:
logits_aux = self.auxiliary_head(s1)
logits = self.classifier(s1, input.shape[2:])
if self._auxiliary and self.training:
return logits, logits_aux
return logits
class NetworkImageNet(nn.Module):
"""ImageNet network (ported from DARTS)."""
def __init__(self, C, num_classes, layers, auxiliary, genotype):
super(NetworkImageNet, self).__init__()
self._layers = layers
self._auxiliary = auxiliary
self.stem0 = nn.Sequential(
nn.Conv2d(cfg.MODEL.INPUT_CHANNELS, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C // 2),
nn.ReLU(inplace=True),
nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)
self.stem1 = nn.Sequential(
nn.ReLU(inplace=True),
nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)
C_prev_prev, C_prev, C_curr = C, C, C
self.cells = nn.ModuleList()
reduction_prev = True
reduction_layers = [layers//3] if cfg.TASK == 'seg' else [layers//3, 2*layers//3]
for i in range(layers):
if i in reduction_layers:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
reduction_prev = reduction
self.cells += [cell]
C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
if i == 2 * layers // 3:
C_to_auxiliary = C_prev
if auxiliary:
self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
self.classifier = Classifier(C_prev, num_classes)
def forward(self, input):
input = Preprocess(input)
logits_aux = None
s0 = self.stem0(input)
s1 = self.stem1(s0)
for i, cell in enumerate(self.cells):
s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
if i == 2 * self._layers // 3:
if self._auxiliary and self.training:
logits_aux = self.auxiliary_head(s1)
logits = self.classifier(s1, input.shape[2:])
if self._auxiliary and self.training:
return logits, logits_aux
return logits
class NAS(nn.Module):
"""NAS net wrapper (delegates to nets from DARTS)."""
def __init__(self):
assert cfg.TRAIN.DATASET in ['cifar10', 'imagenet', 'cityscapes'], \
'Training on {} is not supported'.format(cfg.TRAIN.DATASET)
assert cfg.TEST.DATASET in ['cifar10', 'imagenet', 'cityscapes'], \
'Testing on {} is not supported'.format(cfg.TEST.DATASET)
assert cfg.NAS.GENOTYPE in GENOTYPES, \
'Genotype {} not supported'.format(cfg.NAS.GENOTYPE)
super(NAS, self).__init__()
logger.info('Constructing NAS: {}'.format(cfg.NAS))
# Use a custom or predefined genotype
if cfg.NAS.GENOTYPE == 'custom':
genotype = Genotype(
normal=cfg.NAS.CUSTOM_GENOTYPE[0],
normal_concat=cfg.NAS.CUSTOM_GENOTYPE[1],
reduce=cfg.NAS.CUSTOM_GENOTYPE[2],
reduce_concat=cfg.NAS.CUSTOM_GENOTYPE[3],
)
else:
genotype = GENOTYPES[cfg.NAS.GENOTYPE]
# Determine the network constructor for dataset
if 'cifar' in cfg.TRAIN.DATASET:
net_ctor = NetworkCIFAR
else:
net_ctor = NetworkImageNet
# Construct the network
self.net_ = net_ctor(
C=cfg.NAS.WIDTH,
num_classes=cfg.MODEL.NUM_CLASSES,
layers=cfg.NAS.DEPTH,
auxiliary=cfg.NAS.AUX,
genotype=genotype
)
# Drop path probability (set / annealed based on epoch)
self.net_.drop_path_prob = 0.0
def set_drop_path_prob(self, drop_path_prob):
self.net_.drop_path_prob = drop_path_prob
def forward(self, x):
return self.net_.forward(x)

View File

@@ -0,0 +1,201 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""NAS ops (adopted from DARTS)."""
import torch
import torch.nn as nn
OPS = {
'none': lambda C, stride, affine:
Zero(stride),
'avg_pool_2x2': lambda C, stride, affine:
nn.AvgPool2d(2, stride=stride, padding=0, count_include_pad=False),
'avg_pool_3x3': lambda C, stride, affine:
nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
'avg_pool_5x5': lambda C, stride, affine:
nn.AvgPool2d(5, stride=stride, padding=2, count_include_pad=False),
'max_pool_2x2': lambda C, stride, affine:
nn.MaxPool2d(2, stride=stride, padding=0),
'max_pool_3x3': lambda C, stride, affine:
nn.MaxPool2d(3, stride=stride, padding=1),
'max_pool_5x5': lambda C, stride, affine:
nn.MaxPool2d(5, stride=stride, padding=2),
'max_pool_7x7': lambda C, stride, affine:
nn.MaxPool2d(7, stride=stride, padding=3),
'skip_connect': lambda C, stride, affine:
Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
'conv_1x1': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=stride, padding=0, bias=False),
nn.BatchNorm2d(C, affine=affine)
),
'conv_3x3': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(C, affine=affine)
),
'sep_conv_3x3': lambda C, stride, affine:
SepConv(C, C, 3, stride, 1, affine=affine),
'sep_conv_5x5': lambda C, stride, affine:
SepConv(C, C, 5, stride, 2, affine=affine),
'sep_conv_7x7': lambda C, stride, affine:
SepConv(C, C, 7, stride, 3, affine=affine),
'dil_conv_3x3': lambda C, stride, affine:
DilConv(C, C, 3, stride, 2, 2, affine=affine),
'dil_conv_5x5': lambda C, stride, affine:
DilConv(C, C, 5, stride, 4, 2, affine=affine),
'dil_sep_conv_3x3': lambda C, stride, affine:
DilSepConv(C, C, 3, stride, 2, 2, affine=affine),
'conv_3x1_1x3': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1,3), stride=(1, stride), padding=(0, 1), bias=False),
nn.Conv2d(C, C, (3,1), stride=(stride, 1), padding=(1, 0), bias=False),
nn.BatchNorm2d(C, affine=affine)
),
'conv_7x1_1x7': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
nn.BatchNorm2d(C, affine=affine)
),
}
class ReLUConvBN(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(ReLUConvBN, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_out, kernel_size, stride=stride,
padding=padding, bias=False
),
nn.BatchNorm2d(C_out, affine=affine)
)
def forward(self, x):
return self.op(x)
class DilConv(nn.Module):
def __init__(
self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True
):
super(DilConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
def forward(self, x):
return self.op(x)
class SepConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super(SepConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=stride,
padding=padding, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_in, affine=affine),
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=1,
padding=padding, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
def forward(self, x):
return self.op(x)
class DilSepConv(nn.Module):
def __init__(
self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True
):
super(DilSepConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_in, affine=affine),
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=1,
padding=padding, dilation=dilation, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
def forward(self, x):
return self.op(x)
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
class Zero(nn.Module):
def __init__(self, stride):
super(Zero, self).__init__()
self.stride = stride
def forward(self, x):
if self.stride == 1:
return x.mul(0.)
return x[:,:,::self.stride,::self.stride].mul(0.)
class FactorizedReduce(nn.Module):
def __init__(self, C_in, C_out, affine=True):
super(FactorizedReduce, self).__init__()
assert C_out % 2 == 0
self.relu = nn.ReLU(inplace=False)
self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.bn = nn.BatchNorm2d(C_out, affine=affine)
self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
def forward(self, x):
x = self.relu(x)
y = self.pad(x)
out = torch.cat([self.conv_1(x), self.conv_2(y[:,:,1:,1:])], dim=1)
out = self.bn(out)
return out

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""RegNet models."""
import numpy as np
from pycls.core.config import cfg
from pycls.models.anynet import AnyNet
def quantize_float(f, q):
"""Converts a float to closest non-zero int divisible by q."""
return int(round(f / q) * q)
def adjust_ws_gs_comp(ws, bms, gs):
"""Adjusts the compatibility of widths and groups."""
ws_bot = [int(w * b) for w, b in zip(ws, bms)]
gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
return ws, gs
def get_stages_from_blocks(ws, rs):
"""Gets ws/ds of network at each stage from per block values."""
ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
ts = [w != wp or r != rp for w, wp, r, rp in ts_temp]
s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
return s_ws, s_ds
def generate_regnet(w_a, w_0, w_m, d, q=8):
"""Generates per block ws from RegNet parameters."""
assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
ws_cont = np.arange(d) * w_a + w_0
ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
ws = w_0 * np.power(w_m, ks)
ws = np.round(np.divide(ws, q)) * q
num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
return ws, num_stages, max_stage, ws_cont
class RegNet(AnyNet):
"""RegNet model."""
@staticmethod
def get_args():
"""Convert RegNet to AnyNet parameter format."""
# Generate RegNet ws per block
w_a, w_0, w_m, d = cfg.REGNET.WA, cfg.REGNET.W0, cfg.REGNET.WM, cfg.REGNET.DEPTH
ws, num_stages, _, _ = generate_regnet(w_a, w_0, w_m, d)
# Convert to per stage format
s_ws, s_ds = get_stages_from_blocks(ws, ws)
# Use the same gw, bm and ss for each stage
s_gs = [cfg.REGNET.GROUP_W for _ in range(num_stages)]
s_bs = [cfg.REGNET.BOT_MUL for _ in range(num_stages)]
s_ss = [cfg.REGNET.STRIDE for _ in range(num_stages)]
# Adjust the compatibility of ws and gws
s_ws, s_gs = adjust_ws_gs_comp(s_ws, s_bs, s_gs)
# Get AnyNet arguments defining the RegNet
return {
"stem_type": cfg.REGNET.STEM_TYPE,
"stem_w": cfg.REGNET.STEM_W,
"block_type": cfg.REGNET.BLOCK_TYPE,
"ds": s_ds,
"ws": s_ws,
"ss": s_ss,
"bms": s_bs,
"gws": s_gs,
"se_r": cfg.REGNET.SE_R if cfg.REGNET.SE_ON else None,
"nc": cfg.MODEL.NUM_CLASSES,
}
def __init__(self):
kwargs = RegNet.get_args()
super(RegNet, self).__init__(**kwargs)
@staticmethod
def complexity(cx, **kwargs):
"""Computes model complexity. If you alter the model, make sure to update."""
kwargs = RegNet.get_args() if not kwargs else kwargs
return AnyNet.complexity(cx, **kwargs)

View File

@@ -0,0 +1,280 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""ResNe(X)t models."""
import pycls.core.net as net
import torch.nn as nn
from pycls.core.config import cfg
# Stage depths for ImageNet models
_IN_STAGE_DS = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3), 152: (3, 8, 36, 3)}
def get_trans_fun(name):
"""Retrieves the transformation function by name."""
trans_funs = {
"basic_transform": BasicTransform,
"bottleneck_transform": BottleneckTransform,
}
err_str = "Transformation function '{}' not supported"
assert name in trans_funs.keys(), err_str.format(name)
return trans_funs[name]
class ResHead(nn.Module):
"""ResNet head: AvgPool, 1x1."""
def __init__(self, w_in, nc):
super(ResHead, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(w_in, nc, bias=True)
def forward(self, x):
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
@staticmethod
def complexity(cx, w_in, nc):
cx["h"], cx["w"] = 1, 1
cx = net.complexity_conv2d(cx, w_in, nc, 1, 1, 0, bias=True)
return cx
class BasicTransform(nn.Module):
"""Basic transformation: 3x3, BN, ReLU, 3x3, BN."""
def __init__(self, w_in, w_out, stride, w_b=None, num_gs=1):
err_str = "Basic transform does not support w_b and num_gs options"
assert w_b is None and num_gs == 1, err_str
super(BasicTransform, self).__init__()
self.a = nn.Conv2d(w_in, w_out, 3, stride=stride, padding=1, bias=False)
self.a_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.a_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
self.b = nn.Conv2d(w_out, w_out, 3, stride=1, padding=1, bias=False)
self.b_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.b_bn.final_bn = True
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, w_b=None, num_gs=1):
err_str = "Basic transform does not support w_b and num_gs options"
assert w_b is None and num_gs == 1, err_str
cx = net.complexity_conv2d(cx, w_in, w_out, 3, stride, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
cx = net.complexity_conv2d(cx, w_out, w_out, 3, 1, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class BottleneckTransform(nn.Module):
"""Bottleneck transformation: 1x1, BN, ReLU, 3x3, BN, ReLU, 1x1, BN."""
def __init__(self, w_in, w_out, stride, w_b, num_gs):
super(BottleneckTransform, self).__init__()
# MSRA -> stride=2 is on 1x1; TH/C2 -> stride=2 is on 3x3
(s1, s3) = (stride, 1) if cfg.RESNET.STRIDE_1X1 else (1, stride)
self.a = nn.Conv2d(w_in, w_b, 1, stride=s1, padding=0, bias=False)
self.a_bn = nn.BatchNorm2d(w_b, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.a_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
self.b = nn.Conv2d(w_b, w_b, 3, stride=s3, padding=1, groups=num_gs, bias=False)
self.b_bn = nn.BatchNorm2d(w_b, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.b_relu = nn.ReLU(inplace=cfg.MEM.RELU_INPLACE)
self.c = nn.Conv2d(w_b, w_out, 1, stride=1, padding=0, bias=False)
self.c_bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.c_bn.final_bn = True
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, w_b, num_gs):
(s1, s3) = (stride, 1) if cfg.RESNET.STRIDE_1X1 else (1, stride)
cx = net.complexity_conv2d(cx, w_in, w_b, 1, s1, 0)
cx = net.complexity_batchnorm2d(cx, w_b)
cx = net.complexity_conv2d(cx, w_b, w_b, 3, s3, 1, num_gs)
cx = net.complexity_batchnorm2d(cx, w_b)
cx = net.complexity_conv2d(cx, w_b, w_out, 1, 1, 0)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class ResBlock(nn.Module):
"""Residual block: x + F(x)."""
def __init__(self, w_in, w_out, stride, trans_fun, w_b=None, num_gs=1):
super(ResBlock, self).__init__()
# Use skip connection with projection if shape changes
self.proj_block = (w_in != w_out) or (stride != 1)
if self.proj_block:
self.proj = nn.Conv2d(w_in, w_out, 1, stride=stride, padding=0, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.f = trans_fun(w_in, w_out, stride, w_b, num_gs)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
def forward(self, x):
if self.proj_block:
x = self.bn(self.proj(x)) + self.f(x)
else:
x = x + self.f(x)
x = self.relu(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, trans_fun, w_b, num_gs):
proj_block = (w_in != w_out) or (stride != 1)
if proj_block:
h, w = cx["h"], cx["w"]
cx = net.complexity_conv2d(cx, w_in, w_out, 1, stride, 0)
cx = net.complexity_batchnorm2d(cx, w_out)
cx["h"], cx["w"] = h, w # parallel branch
cx = trans_fun.complexity(cx, w_in, w_out, stride, w_b, num_gs)
return cx
class ResStage(nn.Module):
"""Stage of ResNet."""
def __init__(self, w_in, w_out, stride, d, w_b=None, num_gs=1):
super(ResStage, self).__init__()
for i in range(d):
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
trans_fun = get_trans_fun(cfg.RESNET.TRANS_FUN)
res_block = ResBlock(b_w_in, w_out, b_stride, trans_fun, w_b, num_gs)
self.add_module("b{}".format(i + 1), res_block)
def forward(self, x):
for block in self.children():
x = block(x)
return x
@staticmethod
def complexity(cx, w_in, w_out, stride, d, w_b=None, num_gs=1):
for i in range(d):
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
trans_f = get_trans_fun(cfg.RESNET.TRANS_FUN)
cx = ResBlock.complexity(cx, b_w_in, w_out, b_stride, trans_f, w_b, num_gs)
return cx
class ResStemCifar(nn.Module):
"""ResNet stem for CIFAR: 3x3, BN, ReLU."""
def __init__(self, w_in, w_out):
super(ResStemCifar, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 3, stride=1, padding=1, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out):
cx = net.complexity_conv2d(cx, w_in, w_out, 3, 1, 1)
cx = net.complexity_batchnorm2d(cx, w_out)
return cx
class ResStemIN(nn.Module):
"""ResNet stem for ImageNet: 7x7, BN, ReLU, MaxPool."""
def __init__(self, w_in, w_out):
super(ResStemIN, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 7, stride=2, padding=3, bias=False)
self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
self.pool = nn.MaxPool2d(3, stride=2, padding=1)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
@staticmethod
def complexity(cx, w_in, w_out):
cx = net.complexity_conv2d(cx, w_in, w_out, 7, 2, 3)
cx = net.complexity_batchnorm2d(cx, w_out)
cx = net.complexity_maxpool2d(cx, 3, 2, 1)
return cx
class ResNet(nn.Module):
"""ResNet model."""
def __init__(self):
datasets = ["cifar10", "imagenet"]
err_str = "Dataset {} is not supported"
assert cfg.TRAIN.DATASET in datasets, err_str.format(cfg.TRAIN.DATASET)
assert cfg.TEST.DATASET in datasets, err_str.format(cfg.TEST.DATASET)
super(ResNet, self).__init__()
if "cifar" in cfg.TRAIN.DATASET:
self._construct_cifar()
else:
self._construct_imagenet()
self.apply(net.init_weights)
def _construct_cifar(self):
err_str = "Model depth should be of the format 6n + 2 for cifar"
assert (cfg.MODEL.DEPTH - 2) % 6 == 0, err_str
d = int((cfg.MODEL.DEPTH - 2) / 6)
self.stem = ResStemCifar(3, 16)
self.s1 = ResStage(16, 16, stride=1, d=d)
self.s2 = ResStage(16, 32, stride=2, d=d)
self.s3 = ResStage(32, 64, stride=2, d=d)
self.head = ResHead(64, nc=cfg.MODEL.NUM_CLASSES)
def _construct_imagenet(self):
g, gw = cfg.RESNET.NUM_GROUPS, cfg.RESNET.WIDTH_PER_GROUP
(d1, d2, d3, d4) = _IN_STAGE_DS[cfg.MODEL.DEPTH]
w_b = gw * g
self.stem = ResStemIN(3, 64)
self.s1 = ResStage(64, 256, stride=1, d=d1, w_b=w_b, num_gs=g)
self.s2 = ResStage(256, 512, stride=2, d=d2, w_b=w_b * 2, num_gs=g)
self.s3 = ResStage(512, 1024, stride=2, d=d3, w_b=w_b * 4, num_gs=g)
self.s4 = ResStage(1024, 2048, stride=2, d=d4, w_b=w_b * 8, num_gs=g)
self.head = ResHead(2048, nc=cfg.MODEL.NUM_CLASSES)
def forward(self, x):
for module in self.children():
x = module(x)
return x
@staticmethod
def complexity(cx):
"""Computes model complexity. If you alter the model, make sure to update."""
if "cifar" in cfg.TRAIN.DATASET:
d = int((cfg.MODEL.DEPTH - 2) / 6)
cx = ResStemCifar.complexity(cx, 3, 16)
cx = ResStage.complexity(cx, 16, 16, stride=1, d=d)
cx = ResStage.complexity(cx, 16, 32, stride=2, d=d)
cx = ResStage.complexity(cx, 32, 64, stride=2, d=d)
cx = ResHead.complexity(cx, 64, nc=cfg.MODEL.NUM_CLASSES)
else:
g, gw = cfg.RESNET.NUM_GROUPS, cfg.RESNET.WIDTH_PER_GROUP
(d1, d2, d3, d4) = _IN_STAGE_DS[cfg.MODEL.DEPTH]
w_b = gw * g
cx = ResStemIN.complexity(cx, 3, 64)
cx = ResStage.complexity(cx, 64, 256, 1, d=d1, w_b=w_b, num_gs=g)
cx = ResStage.complexity(cx, 256, 512, 2, d=d2, w_b=w_b * 2, num_gs=g)
cx = ResStage.complexity(cx, 512, 1024, 2, d=d3, w_b=w_b * 4, num_gs=g)
cx = ResStage.complexity(cx, 1024, 2048, 2, d=d4, w_b=w_b * 8, num_gs=g)
cx = ResHead.complexity(cx, 2048, nc=cfg.MODEL.NUM_CLASSES)
return cx

View File

@@ -0,0 +1,304 @@
import argparse
import nasspace
import datasets
import random
import numpy as np
import torch
import os
from scores import get_score_func
from scipy import stats
import time
# from pycls.models.nas.nas import Cell
from utils import add_dropout, init_network
parser = argparse.ArgumentParser(description='NAS Without Training')
parser.add_argument('--data_loc', default='../cifardata/', type=str, help='dataset folder')
parser.add_argument('--api_loc', default='../NAS-Bench-201-v1_0-e61699.pth',
type=str, help='path to API')
parser.add_argument('--save_loc', default='results', type=str, help='folder to save results')
parser.add_argument('--save_string', default='naswot', type=str, help='prefix of results file')
parser.add_argument('--score', default='hook_logdet', type=str, help='the score to evaluate')
parser.add_argument('--nasspace', default='nasbench201', type=str, help='the nas search space to use')
parser.add_argument('--batch_size', default=128, type=int)
parser.add_argument('--repeat', default=1, type=int, help='how often to repeat a single image with a batch')
parser.add_argument('--augtype', default='none', type=str, help='which perturbations to use')
parser.add_argument('--sigma', default=0.05, type=float, help='noise level if augtype is "gaussnoise"')
parser.add_argument('--GPU', default='0', type=str)
parser.add_argument('--seed', default=1, type=int)
parser.add_argument('--init', default='', type=str)
parser.add_argument('--trainval', action='store_true')
parser.add_argument('--dropout', action='store_true')
parser.add_argument('--dataset', default='cifar10', type=str)
parser.add_argument('--maxofn', default=1, type=int, help='score is the max of this many evaluations of the network')
parser.add_argument('--n_samples', default=100, type=int)
parser.add_argument('--n_runs', default=500, type=int)
parser.add_argument('--stem_out_channels', default=16, type=int, help='output channels of stem convolution (nasbench101)')
parser.add_argument('--num_stacks', default=3, type=int, help='#stacks of modules (nasbench101)')
parser.add_argument('--num_modules_per_stack', default=3, type=int, help='#modules per stack (nasbench101)')
parser.add_argument('--num_labels', default=1, type=int, help='#classes (nasbench101)')
args = parser.parse_args()
os.environ['CUDA_VISIBLE_DEVICES'] = args.GPU
# Reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
def get_batch_jacobian(net, x, target, device, args=None):
net.zero_grad()
x.requires_grad_(True)
y, out = net(x)
y.backward(torch.ones_like(y))
jacob = x.grad.detach()
return jacob, target.detach(), y.detach(), out.detach()
def get_nasbench201_idx_score(idx, train_loader, searchspace, args):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# searchspace = nasspace.get_search_space(args)
if 'valid' in args.dataset:
args.dataset = args.dataset.replace('-valid', '')
# train_loader = datasets.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
# os.makedirs(args.save_loc, exist_ok=True)
# filename = f'{args.save_loc}/{args.save_string}_{args.score}_{args.nasspace}_{args.dataset}{"_" + args.init + "_" if args.init != "" else args.init}_{"_dropout" if args.dropout else ""}_{args.augtype}_{args.sigma}_{args.repeat}_{args.trainval}_{args.batch_size}_{args.maxofn}_{args.seed}'
# accfilename = f'{args.save_loc}/{args.save_string}_accs_{args.nasspace}_{args.dataset}_{args.trainval}'
# scores = np.zeros(len(searchspace))
# accs = np.zeros(len(searchspace))
i = idx
uid = idx
print(f'uid: {uid}')
print(f'get network')
network = searchspace.get_network(uid)
print(f'get network done')
try:
if args.dropout:
add_dropout(network, args.sigma)
if args.init != '':
init_network(network, args.init)
if 'hook_' in args.score:
network.K = np.zeros((args.batch_size, args.batch_size))
def counting_forward_hook(module, inp, out):
try:
if not module.visited_backwards:
return
if isinstance(inp, tuple):
# print(len(inp))
inp = inp[0]
inp = inp.view(inp.size(0), -1)
x = (inp > 0).float()
K = x @ x.t()
K2 = (1.-x) @ (1.-x.t())
network.K = network.K + K.cpu().numpy() + K2.cpu().numpy()
except:
pass
def counting_backward_hook(module, inp, out):
module.visited_backwards = True
for name, module in network.named_modules():
if 'ReLU' in str(type(module)):
#hooks[name] = module.register_forward_hook(counting_hook)
module.register_forward_hook(counting_forward_hook)
module.register_backward_hook(counting_backward_hook)
network = network.to(device)
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
s = []
for j in range(args.maxofn):
data_iterator = iter(train_loader)
x, target = next(data_iterator)
x2 = torch.clone(x)
x2 = x2.to(device)
x, target = x.to(device), target.to(device)
jacobs, labels, y, out = get_batch_jacobian(network, x, target, device, args)
if 'hook_' in args.score:
network(x2.to(device))
s.append(get_score_func(args.score)(network.K, target))
else:
s.append(get_score_func(args.score)(jacobs, labels))
return np.mean(s)
scores[i] = np.mean(s)
accs[i] = searchspace.get_final_accuracy(uid, acc_type, args.trainval)
accs_ = accs[~np.isnan(scores)]
scores_ = scores[~np.isnan(scores)]
numnan = np.isnan(scores).sum()
tau, p = stats.kendalltau(accs_[:max(i-numnan, 1)], scores_[:max(i-numnan, 1)])
print(f'{tau}')
if i % 1000 == 0:
np.save(filename, scores)
np.save(accfilename, accs)
except Exception as e:
print(e)
print('final result')
return np.nan
class Args:
pass
args = Args()
args.trainval = True
args.augtype = 'none'
args.repeat = 1
args.score = 'hook_logdet'
args.sigma = 0.05
args.nasspace = 'nasbench201'
args.batch_size = 128
args.GPU = '0'
args.dataset = 'cifar10-valid'
args.api_loc = '/home/stud/hanzhang/nasbenchDiT/graph_dit/NAS-Bench-201-v1_1-096897.pth'
args.data_loc = '../cifardata/'
args.seed = 777
args.init = ''
args.save_loc = 'results'
args.save_string = 'naswot'
args.dropout = False
args.maxofn = 1
args.n_samples = 100
args.n_runs = 500
args.stem_out_channels = 16
args.num_stacks = 3
args.num_modules_per_stack = 3
args.num_labels = 1
if 'valid' in args.dataset:
args.dataset = args.dataset.replace('-valid', '')
print('start to get search space')
start_time = time.time()
searchspace = nasspace.get_search_space(args)
end_time = time.time()
print(f'search space time: {end_time - start_time}')
train_loader = datasets.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
print('start to get score')
print('5374')
start_time = time.time()
print(get_nasbench201_idx_score(5374,train_loader=train_loader, searchspace=searchspace, args=args))
end_time = time.time()
print(f'5374 time: {end_time - start_time}')
print('5375')
start_time = time.time()
print(get_nasbench201_idx_score(5375,train_loader=train_loader, searchspace=searchspace, args=args))
end_time = time.time()
print(f'5375 time: {end_time - start_time}')
print('5376')
start_time = time.time()
print(get_nasbench201_idx_score(5376,train_loader=train_loader, searchspace=searchspace, args=args))
end_time = time.time()
print(f'5376 time: {end_time - start_time}')
# device = "cuda:0"
# dataset = dataset
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# savedataset = args.dataset
# dataset = 'fake' if 'fake' in args.dataset else args.dataset
# args.dataset = args.dataset.replace('fake', '')
# if args.dataset == 'cifar10':
# args.dataset = args.dataset + '-valid'
# searchspace = nasspace.get_search_space(args)
# if 'valid' in args.dataset:
# args.dataset = args.dataset.replace('-valid', '')
# train_loader = datasets.get_data(args.dataset, args.data_loc, args.trainval, args.batch_size, args.augtype, args.repeat, args)
# os.makedirs(args.save_loc, exist_ok=True)
# filename = f'{args.save_loc}/{args.save_string}_{args.score}_{args.nasspace}_{savedataset}{"_" + args.init + "_" if args.init != "" else args.init}_{"_dropout" if args.dropout else ""}_{args.augtype}_{args.sigma}_{args.repeat}_{args.trainval}_{args.batch_size}_{args.maxofn}_{args.seed}'
# accfilename = f'{args.save_loc}/{args.save_string}_accs_{args.nasspace}_{savedataset}_{args.trainval}'
# if args.dataset == 'cifar10':
# acc_type = 'ori-test'
# val_acc_type = 'x-valid'
# else:
# acc_type = 'x-test'
# val_acc_type = 'x-valid'
# scores = np.zeros(len(searchspace))
# try:
# accs = np.load(accfilename + '.npy')
# except:
# accs = np.zeros(len(searchspace))
# for i, (uid, network) in enumerate(searchspace):
# # Reproducibility
# try:
# if args.dropout:
# add_dropout(network, args.sigma)
# if args.init != '':
# init_network(network, args.init)
# if 'hook_' in args.score:
# network.K = np.zeros((args.batch_size, args.batch_size))
# def counting_forward_hook(module, inp, out):
# try:
# if not module.visited_backwards:
# return
# if isinstance(inp, tuple):
# print(len(inp))
# inp = inp[0]
# inp = inp.view(inp.size(0), -1)
# x = (inp > 0).float()
# K = x @ x.t()
# K2 = (1.-x) @ (1.-x.t())
# network.K = network.K + K.cpu().numpy() + K2.cpu().numpy()
# except:
# pass
# def counting_backward_hook(module, inp, out):
# module.visited_backwards = True
# for name, module in network.named_modules():
# if 'ReLU' in str(type(module)):
# #hooks[name] = module.register_forward_hook(counting_hook)
# module.register_forward_hook(counting_forward_hook)
# module.register_backward_hook(counting_backward_hook)
# network = network.to(device)
# random.seed(args.seed)
# np.random.seed(args.seed)
# torch.manual_seed(args.seed)
# s = []
# for j in range(args.maxofn):
# data_iterator = iter(train_loader)
# x, target = next(data_iterator)
# x2 = torch.clone(x)
# x2 = x2.to(device)
# x, target = x.to(device), target.to(device)
# jacobs, labels, y, out = get_batch_jacobian(network, x, target, device, args)
# if 'hook_' in args.score:
# network(x2.to(device))
# s.append(get_score_func(args.score)(network.K, target))
# else:
# s.append(get_score_func(args.score)(jacobs, labels))
# scores[i] = np.mean(s)
# accs[i] = searchspace.get_final_accuracy(uid, acc_type, args.trainval)
# accs_ = accs[~np.isnan(scores)]
# scores_ = scores[~np.isnan(scores)]
# numnan = np.isnan(scores).sum()
# tau, p = stats.kendalltau(accs_[:max(i-numnan, 1)], scores_[:max(i-numnan, 1)])
# print(f'{tau}')
# if i % 1000 == 0:
# np.save(filename, scores)
# np.save(accfilename, accs)
# except Exception as e:
# print(e)
# accs[i] = searchspace.get_final_accuracy(uid, acc_type, args.trainval)
# scores[i] = np.nan
# np.save(filename, scores)
# np.save(accfilename, accs)

View File

@@ -0,0 +1,21 @@
import numpy as np
import torch
def hooklogdet(K, labels=None):
s, ld = np.linalg.slogdet(K)
return ld
def random_score(jacob, label=None):
return np.random.normal()
_scores = {
'hook_logdet': hooklogdet,
'random': random_score
}
def get_score_func(score_name):
return _scores[score_name]

100
graph_dit/naswot/utils.py Normal file
View File

@@ -0,0 +1,100 @@
import torch
from pycls.models.nas.nas import Cell
class DropChannel(torch.nn.Module):
def __init__(self, p, mod):
super(DropChannel, self).__init__()
self.mod = mod
self.p = p
def forward(self, s0, s1, droppath):
ret = self.mod(s0, s1, droppath)
return ret
class DropConnect(torch.nn.Module):
def __init__(self, p):
super(DropConnect, self).__init__()
self.p = p
def forward(self, inputs):
batch_size = inputs.shape[0]
dim1 = inputs.shape[2]
dim2 = inputs.shape[3]
channel_size = inputs.shape[1]
keep_prob = 1 - self.p
# generate binary_tensor mask according to probability (p for 0, 1-p for 1)
random_tensor = keep_prob
random_tensor += torch.rand([batch_size, channel_size, 1, 1], dtype=inputs.dtype, device=inputs.device)
binary_tensor = torch.floor(random_tensor)
output = inputs / keep_prob * binary_tensor
return output
def add_dropout(network, p, prefix=''):
#p = 0.5
for attr_str in dir(network):
target_attr = getattr(network, attr_str)
if isinstance(target_attr, torch.nn.Conv2d):
setattr(network, attr_str, torch.nn.Sequential(target_attr, DropConnect(p)))
elif isinstance(target_attr, Cell):
setattr(network, attr_str, DropChannel(p, target_attr))
for n, ch in list(network.named_children()):
#print(f'{prefix}add_dropout {n}')
if isinstance(ch, torch.nn.Conv2d):
setattr(network, n, torch.nn.Sequential(ch, DropConnect(p)))
elif isinstance(ch, Cell):
setattr(network, n, DropChannel(p, ch))
else:
add_dropout(ch, p, prefix + '\t')
def orth_init(m):
if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
torch.nn.init.orthogonal_(m.weight)
def uni_init(m):
if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
torch.nn.init.uniform_(m.weight)
def uni2_init(m):
if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
torch.nn.init.uniform_(m.weight, -1., 1.)
def uni3_init(m):
if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
torch.nn.init.uniform_(m.weight, -.5, .5)
def norm_init(m):
if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
torch.nn.init.norm_(m.weight)
def eye_init(m):
if isinstance(m, torch.nn.Linear):
torch.nn.init.eye_(m.weight)
elif isinstance(m, torch.nn.Conv2d):
torch.nn.init.dirac_(m.weight)
def fixup_init(m):
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.zero_(m.weight)
elif isinstance(m, torch.nn.Linear):
torch.nn.init.zero_(m.weight)
torch.nn.init.zero_(m.bias)
def init_network(network, init):
if init == 'orthogonal':
network.apply(orth_init)
elif init == 'uniform':
print('uniform')
network.apply(uni_init)
elif init == 'uniform2':
network.apply(uni2_init)
elif init == 'uniform3':
network.apply(uni3_init)
elif init == 'normal':
network.apply(norm_init)
elif init == 'identity':
network.apply(eye_init)