add autodl
This commit is contained in:
36
AutoDL-Projects/xautodl/xmisc/__init__.py
Normal file
36
AutoDL-Projects/xautodl/xmisc/__init__.py
Normal file
@@ -0,0 +1,36 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
|
||||
#####################################################
|
||||
"""The module and yaml related functions."""
|
||||
from .module_utils import call_by_dict
|
||||
from .module_utils import call_by_yaml
|
||||
from .module_utils import nested_call_by_dict
|
||||
from .module_utils import nested_call_by_yaml
|
||||
from .yaml_utils import load_yaml
|
||||
|
||||
from .torch_utils import count_parameters
|
||||
|
||||
from .logger_utils import Logger
|
||||
|
||||
"""The data sampler related classes."""
|
||||
from .sampler_utils import BatchSampler
|
||||
|
||||
"""The meter related classes."""
|
||||
from .meter_utils import AverageMeter
|
||||
|
||||
"""The scheduler related classes."""
|
||||
from .scheduler_utils import CosineParamScheduler, WarmupParamScheduler, LRMultiplier
|
||||
|
||||
|
||||
def get_scheduler(indicator, lr):
|
||||
if indicator == "warm-cos":
|
||||
multiplier = WarmupParamScheduler(
|
||||
CosineParamScheduler(lr, lr * 1e-3),
|
||||
warmup_factor=0.001,
|
||||
warmup_length=0.05,
|
||||
warmup_method="linear",
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError("Unknown indicator: {:}".format(indicator))
|
||||
return multiplier
|
49
AutoDL-Projects/xautodl/xmisc/logger_utils.py
Normal file
49
AutoDL-Projects/xautodl/xmisc/logger_utils.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
|
||||
#####################################################
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from .time_utils import time_for_file, time_string
|
||||
|
||||
|
||||
class Logger:
|
||||
"""A logger used in xautodl."""
|
||||
|
||||
def __init__(self, root_dir, prefix="", log_time=True):
|
||||
"""Create a summary writer logging to log_dir."""
|
||||
self.root_dir = Path(root_dir)
|
||||
self.log_dir = self.root_dir / "logs"
|
||||
self.log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self._prefix = prefix
|
||||
self._log_time = log_time
|
||||
self.logger_path = self.log_dir / "{:}{:}.log".format(
|
||||
self._prefix, time_for_file()
|
||||
)
|
||||
self._logger_file = open(self.logger_path, "w")
|
||||
|
||||
@property
|
||||
def logger(self):
|
||||
return self._logger_file
|
||||
|
||||
def log(self, string, save=True, stdout=False):
|
||||
string = "{:} {:}".format(time_string(), string) if self._log_time else string
|
||||
if stdout:
|
||||
sys.stdout.write(string)
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
print(string)
|
||||
if save:
|
||||
self._logger_file.write("{:}\n".format(string))
|
||||
self._logger_file.flush()
|
||||
|
||||
def close(self):
|
||||
self._logger_file.close()
|
||||
if self.writer is not None:
|
||||
self.writer.close()
|
||||
|
||||
def __repr__(self):
|
||||
return "{name}(dir={log_dir}, prefix={_prefix}, log_time={_log_time})".format(
|
||||
name=self.__class__.__name__, **self.__dict__
|
||||
)
|
163
AutoDL-Projects/xautodl/xmisc/meter_utils.py
Normal file
163
AutoDL-Projects/xautodl/xmisc/meter_utils.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2020.06 #
|
||||
#####################################################
|
||||
# In this python file, it contains the meter classes#
|
||||
# , which may need to use PyTorch or Numpy. #
|
||||
#####################################################
|
||||
import abc
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class AverageMeter:
|
||||
"""Computes and stores the average and current value"""
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0.0
|
||||
self.avg = 0.0
|
||||
self.sum = 0.0
|
||||
self.count = 0.0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
def __repr__(self):
|
||||
return "{name}(val={val}, avg={avg}, count={count})".format(
|
||||
name=self.__class__.__name__, **self.__dict__
|
||||
)
|
||||
|
||||
|
||||
class Metric(abc.ABC):
|
||||
"""The default meta metric class."""
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def __call__(self, predictions, targets):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_info(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def perf_str(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def __repr__(self):
|
||||
return "{name}({inner})".format(
|
||||
name=self.__class__.__name__, inner=self.inner_repr()
|
||||
)
|
||||
|
||||
def inner_repr(self):
|
||||
return ""
|
||||
|
||||
|
||||
class ComposeMetric(Metric):
|
||||
"""The composed metric class."""
|
||||
|
||||
def __init__(self, *metric_list):
|
||||
self.reset()
|
||||
for metric in metric_list:
|
||||
self.append(metric)
|
||||
|
||||
def reset(self):
|
||||
self._metric_list = []
|
||||
|
||||
def append(self, metric):
|
||||
if not isinstance(metric, Metric):
|
||||
raise ValueError(
|
||||
"The input metric is not correct: {:}".format(type(metric))
|
||||
)
|
||||
self._metric_list.append(metric)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._metric_list)
|
||||
|
||||
def __call__(self, predictions, targets):
|
||||
results = list()
|
||||
for metric in self._metric_list:
|
||||
results.append(metric(predictions, targets))
|
||||
return results
|
||||
|
||||
def get_info(self):
|
||||
results = dict()
|
||||
for metric in self._metric_list:
|
||||
for key, value in metric.get_info().items():
|
||||
results[key] = value
|
||||
return results
|
||||
|
||||
def inner_repr(self):
|
||||
xlist = []
|
||||
for metric in self._metric_list:
|
||||
xlist.append(str(metric))
|
||||
return ",".join(xlist)
|
||||
|
||||
|
||||
class CrossEntropyMetric(Metric):
|
||||
"""The metric for the cross entropy metric."""
|
||||
|
||||
def __init__(self, ignore_batch):
|
||||
super(CrossEntropyMetric, self).__init__()
|
||||
self._ignore_batch = ignore_batch
|
||||
|
||||
def reset(self):
|
||||
self._loss = AverageMeter()
|
||||
|
||||
def __call__(self, predictions, targets):
|
||||
if isinstance(predictions, torch.Tensor) and isinstance(targets, torch.Tensor):
|
||||
batch, _ = predictions.shape() # only support 2-D tensor
|
||||
max_prob_indexes = torch.argmax(predictions, dim=-1)
|
||||
if self._ignore_batch:
|
||||
loss = F.cross_entropy(predictions, targets, reduction="sum")
|
||||
self._loss.update(loss.item(), 1)
|
||||
else:
|
||||
loss = F.cross_entropy(predictions, targets, reduction="mean")
|
||||
self._loss.update(loss.item(), batch)
|
||||
return loss
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
def get_info(self):
|
||||
return {"loss": self._loss.avg, "score": self._loss.avg * 100}
|
||||
|
||||
def perf_str(self):
|
||||
return "ce-loss={:.5f}".format(self._loss.avg)
|
||||
|
||||
|
||||
class Top1AccMetric(Metric):
|
||||
"""The metric for the top-1 accuracy."""
|
||||
|
||||
def __init__(self, ignore_batch):
|
||||
super(Top1AccMetric, self).__init__()
|
||||
self._ignore_batch = ignore_batch
|
||||
|
||||
def reset(self):
|
||||
self._accuracy = AverageMeter()
|
||||
|
||||
def __call__(self, predictions, targets):
|
||||
if isinstance(predictions, torch.Tensor) and isinstance(targets, torch.Tensor):
|
||||
batch, _ = predictions.shape() # only support 2-D tensor
|
||||
max_prob_indexes = torch.argmax(predictions, dim=-1)
|
||||
corrects = torch.eq(max_prob_indexes, targets)
|
||||
accuracy = corrects.float().mean().float()
|
||||
if self._ignore_batch:
|
||||
self._accuracy.update(accuracy, 1)
|
||||
else:
|
||||
self._accuracy.update(accuracy, batch)
|
||||
return accuracy
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
def get_info(self):
|
||||
return {"accuracy": self._accuracy.avg, "score": self._accuracy.avg * 100}
|
||||
|
||||
def perf_str(self):
|
||||
return "accuracy={:.3f}%".format(self._accuracy.avg * 100)
|
88
AutoDL-Projects/xautodl/xmisc/module_utils.py
Normal file
88
AutoDL-Projects/xautodl/xmisc/module_utils.py
Normal file
@@ -0,0 +1,88 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.01 #
|
||||
#####################################################
|
||||
from typing import Union, Dict, Text, Any
|
||||
import importlib
|
||||
|
||||
from .yaml_utils import load_yaml
|
||||
|
||||
CLS_FUNC_KEY = "class_or_func"
|
||||
KEYS = (CLS_FUNC_KEY, "module_path", "args", "kwargs")
|
||||
|
||||
|
||||
def has_key_words(xdict):
|
||||
if not isinstance(xdict, dict):
|
||||
return False
|
||||
key_set = set(KEYS)
|
||||
cur_set = set(xdict.keys())
|
||||
return key_set.intersection(cur_set) == key_set
|
||||
|
||||
|
||||
def get_module_by_module_path(module_path):
|
||||
"""Load the module from the path."""
|
||||
|
||||
if module_path.endswith(".py"):
|
||||
module_spec = importlib.util.spec_from_file_location("", module_path)
|
||||
module = importlib.util.module_from_spec(module_spec)
|
||||
module_spec.loader.exec_module(module)
|
||||
else:
|
||||
module = importlib.import_module(module_path)
|
||||
|
||||
return module
|
||||
|
||||
|
||||
def call_by_dict(config: Dict[Text, Any], *args, **kwargs) -> object:
|
||||
"""
|
||||
get initialized instance with config
|
||||
Parameters
|
||||
----------
|
||||
config : a dictionary, such as:
|
||||
{
|
||||
'cls_or_func': 'ClassName',
|
||||
'args': list,
|
||||
'kwargs': dict,
|
||||
'model_path': a string indicating the path,
|
||||
}
|
||||
Returns
|
||||
-------
|
||||
object:
|
||||
An initialized object based on the config info
|
||||
"""
|
||||
module = get_module_by_module_path(config["module_path"])
|
||||
cls_or_func = getattr(module, config[CLS_FUNC_KEY])
|
||||
args = tuple(list(config["args"]) + list(args))
|
||||
kwargs = {**config["kwargs"], **kwargs}
|
||||
return cls_or_func(*args, **kwargs)
|
||||
|
||||
|
||||
def call_by_yaml(path, *args, **kwargs) -> object:
|
||||
config = load_yaml(path)
|
||||
return call_by_config(config, *args, **kwargs)
|
||||
|
||||
|
||||
def nested_call_by_dict(config: Union[Dict[Text, Any], Any], *args, **kwargs) -> object:
|
||||
"""Similar to `call_by_dict`, but differently, the args may contain another dict needs to be called."""
|
||||
if isinstance(config, list):
|
||||
return [nested_call_by_dict(x) for x in config]
|
||||
elif isinstance(config, tuple):
|
||||
return (nested_call_by_dict(x) for x in config)
|
||||
elif not isinstance(config, dict):
|
||||
return config
|
||||
elif not has_key_words(config):
|
||||
return {key: nested_call_by_dict(x) for x, key in config.items()}
|
||||
else:
|
||||
module = get_module_by_module_path(config["module_path"])
|
||||
cls_or_func = getattr(module, config[CLS_FUNC_KEY])
|
||||
args = tuple(list(config["args"]) + list(args))
|
||||
kwargs = {**config["kwargs"], **kwargs}
|
||||
# check whether there are nested special dict
|
||||
new_args = [nested_call_by_dict(x) for x in args]
|
||||
new_kwargs = {}
|
||||
for key, x in kwargs.items():
|
||||
new_kwargs[key] = nested_call_by_dict(x)
|
||||
return cls_or_func(*new_args, **new_kwargs)
|
||||
|
||||
|
||||
def nested_call_by_yaml(path, *args, **kwargs) -> object:
|
||||
config = load_yaml(path)
|
||||
return nested_call_by_dict(config, *args, **kwargs)
|
32
AutoDL-Projects/xautodl/xmisc/sampler_utils.py
Normal file
32
AutoDL-Projects/xautodl/xmisc/sampler_utils.py
Normal file
@@ -0,0 +1,32 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
|
||||
#####################################################
|
||||
import random
|
||||
|
||||
|
||||
class BatchSampler:
|
||||
"""A batch sampler used for single machine training."""
|
||||
|
||||
def __init__(self, dataset, batch, steps):
|
||||
self._num_per_epoch = len(dataset)
|
||||
self._iter_per_epoch = self._num_per_epoch // batch
|
||||
self._steps = steps
|
||||
self._batch = batch
|
||||
if self._num_per_epoch < self._batch:
|
||||
raise ValueError(
|
||||
"The dataset size must be larger than batch={:}".format(batch)
|
||||
)
|
||||
self._indexes = list(range(self._num_per_epoch))
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
yield a batch of indexes using random sampling
|
||||
"""
|
||||
for i in range(self._steps):
|
||||
if i % self._iter_per_epoch == 0:
|
||||
random.shuffle(self._indexes)
|
||||
j = i % self._iter_per_epoch
|
||||
yield self._indexes[j * self._batch : (j + 1) * self._batch]
|
||||
|
||||
def __len__(self):
|
||||
return self._steps
|
532
AutoDL-Projects/xautodl/xmisc/scheduler_utils.py
Normal file
532
AutoDL-Projects/xautodl/xmisc/scheduler_utils.py
Normal file
@@ -0,0 +1,532 @@
|
||||
####################################################
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. #
|
||||
####################################################
|
||||
# Borrowed from https://github.com/facebookresearch/fvcore/blob/master/fvcore/common/param_scheduler.py
|
||||
# and https://github.com/facebookresearch/detectron2/blob/master/detectron2/solver/lr_scheduler.py
|
||||
####################################################
|
||||
import torch
|
||||
|
||||
import bisect
|
||||
import math
|
||||
from typing import List, Optional, Sequence, Union
|
||||
|
||||
__all__ = [
|
||||
"ParamScheduler",
|
||||
"ConstantParamScheduler",
|
||||
"CosineParamScheduler",
|
||||
"ExponentialParamScheduler",
|
||||
"LinearParamScheduler",
|
||||
"CompositeParamScheduler",
|
||||
"MultiStepParamScheduler",
|
||||
"StepParamScheduler",
|
||||
"StepWithFixedGammaParamScheduler",
|
||||
"PolynomialDecayParamScheduler",
|
||||
"WarmupParamScheduler",
|
||||
"LRMultiplier",
|
||||
]
|
||||
|
||||
|
||||
class ParamScheduler:
|
||||
"""
|
||||
Base class for parameter schedulers.
|
||||
A parameter scheduler defines a mapping from a progress value in [0, 1) to
|
||||
a number (e.g. learning rate).
|
||||
"""
|
||||
|
||||
# To be used for comparisons with where
|
||||
WHERE_EPSILON = 1e-6
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
"""
|
||||
Get the value of the param for a given point at training.
|
||||
|
||||
We update params (such as learning rate) based on the percent progress
|
||||
of training completed. This allows a scheduler to be agnostic to the
|
||||
exact length of a particular run (e.g. 120 epochs vs 90 epochs), as
|
||||
long as the relative progress where params should be updated is the same.
|
||||
However, it assumes that the total length of training is known.
|
||||
|
||||
Args:
|
||||
where: A float in [0,1) that represents how far training has progressed
|
||||
|
||||
"""
|
||||
raise NotImplementedError("Param schedulers must override __call__")
|
||||
|
||||
|
||||
class ConstantParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Returns a constant value for a param.
|
||||
"""
|
||||
|
||||
def __init__(self, value: float) -> None:
|
||||
self._value = value
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
if where >= 1.0:
|
||||
raise RuntimeError(
|
||||
f"where in ParamScheduler must be in [0, 1]: got {where}"
|
||||
)
|
||||
return self._value
|
||||
|
||||
|
||||
class CosineParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Cosine decay or cosine warmup schedules based on start and end values.
|
||||
The schedule is updated based on the fraction of training progress.
|
||||
The schedule was proposed in 'SGDR: Stochastic Gradient Descent with
|
||||
Warm Restarts' (https://arxiv.org/abs/1608.03983). Note that this class
|
||||
only implements the cosine annealing part of SGDR, and not the restarts.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
CosineParamScheduler(start_value=0.1, end_value=0.0001)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start_value: float,
|
||||
end_value: float,
|
||||
) -> None:
|
||||
self._start_value = start_value
|
||||
self._end_value = end_value
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
return self._end_value + 0.5 * (self._start_value - self._end_value) * (
|
||||
1 + math.cos(math.pi * where)
|
||||
)
|
||||
|
||||
|
||||
class ExponentialParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Exponetial schedule parameterized by a start value and decay.
|
||||
The schedule is updated based on the fraction of training
|
||||
progress, `where`, with the formula
|
||||
`param_t = start_value * (decay ** where)`.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
ExponentialParamScheduler(start_value=2.0, decay=0.02)
|
||||
|
||||
Corresponds to a decreasing schedule with values in [2.0, 0.04).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start_value: float,
|
||||
decay: float,
|
||||
) -> None:
|
||||
self._start_value = start_value
|
||||
self._decay = decay
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
return self._start_value * (self._decay**where)
|
||||
|
||||
|
||||
class LinearParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Linearly interpolates parameter between ``start_value`` and ``end_value``.
|
||||
Can be used for either warmup or decay based on start and end values.
|
||||
The schedule is updated after every train step by default.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
LinearParamScheduler(start_value=0.0001, end_value=0.01)
|
||||
|
||||
Corresponds to a linear increasing schedule with values in [0.0001, 0.01)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start_value: float,
|
||||
end_value: float,
|
||||
) -> None:
|
||||
self._start_value = start_value
|
||||
self._end_value = end_value
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
# interpolate between start and end values
|
||||
return self._end_value * where + self._start_value * (1 - where)
|
||||
|
||||
|
||||
class MultiStepParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Takes a predefined schedule for a param value, and a list of epochs or steps
|
||||
which stand for the upper boundary (excluded) of each range.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
MultiStepParamScheduler(
|
||||
values=[0.1, 0.01, 0.001, 0.0001],
|
||||
milestones=[30, 60, 80, 120]
|
||||
)
|
||||
|
||||
Then the param value will be 0.1 for epochs 0-29, 0.01 for
|
||||
epochs 30-59, 0.001 for epochs 60-79, 0.0001 for epochs 80-120.
|
||||
Note that the length of values must be equal to the length of milestones
|
||||
plus one.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
values: List[float],
|
||||
num_updates: Optional[int] = None,
|
||||
milestones: Optional[List[int]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Args:
|
||||
values: param value in each range
|
||||
num_updates: the end of the last range. If None, will use ``milestones[-1]``
|
||||
milestones: the boundary of each range. If None, will evenly split ``num_updates``
|
||||
|
||||
For example, all the following combinations define the same scheduler:
|
||||
|
||||
* num_updates=90, milestones=[30, 60], values=[1, 0.1, 0.01]
|
||||
* num_updates=90, values=[1, 0.1, 0.01]
|
||||
* milestones=[30, 60, 90], values=[1, 0.1, 0.01]
|
||||
* milestones=[3, 6, 9], values=[1, 0.1, 0.01] (ParamScheduler is scale-invariant)
|
||||
"""
|
||||
if num_updates is None and milestones is None:
|
||||
raise ValueError("num_updates and milestones cannot both be None")
|
||||
if milestones is None:
|
||||
# Default equispaced drop_epochs behavior
|
||||
milestones = []
|
||||
step_width = math.ceil(num_updates / float(len(values)))
|
||||
for idx in range(len(values) - 1):
|
||||
milestones.append(step_width * (idx + 1))
|
||||
else:
|
||||
if not (
|
||||
isinstance(milestones, Sequence)
|
||||
and len(milestones) == len(values) - int(num_updates is not None)
|
||||
):
|
||||
raise ValueError(
|
||||
"MultiStep scheduler requires a list of %d miletones"
|
||||
% (len(values) - int(num_updates is not None))
|
||||
)
|
||||
|
||||
if num_updates is None:
|
||||
num_updates, milestones = milestones[-1], milestones[:-1]
|
||||
if num_updates < len(values):
|
||||
raise ValueError(
|
||||
"Total num_updates must be greater than length of param schedule"
|
||||
)
|
||||
|
||||
self._param_schedule = values
|
||||
self._num_updates = num_updates
|
||||
self._milestones: List[int] = milestones
|
||||
|
||||
start_epoch = 0
|
||||
for milestone in self._milestones:
|
||||
# Do not exceed the total number of epochs
|
||||
if milestone >= self._num_updates:
|
||||
raise ValueError(
|
||||
"Milestone must be smaller than total number of updates: "
|
||||
"num_updates=%d, milestone=%d" % (self._num_updates, milestone)
|
||||
)
|
||||
# Must be in ascending order
|
||||
if start_epoch >= milestone:
|
||||
raise ValueError(
|
||||
"Milestone must be smaller than start epoch: start_epoch=%d, milestone=%d"
|
||||
% (start_epoch, milestone)
|
||||
)
|
||||
start_epoch = milestone
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
if where > 1.0:
|
||||
raise RuntimeError(
|
||||
f"where in ParamScheduler must be in [0, 1]: got {where}"
|
||||
)
|
||||
epoch_num = int((where + self.WHERE_EPSILON) * self._num_updates)
|
||||
return self._param_schedule[bisect.bisect_right(self._milestones, epoch_num)]
|
||||
|
||||
|
||||
class PolynomialDecayParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Decays the param value after every epoch according to a
|
||||
polynomial function with a fixed power.
|
||||
The schedule is updated after every train step by default.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
PolynomialDecayParamScheduler(base_value=0.1, power=0.9)
|
||||
|
||||
Then the param value will be 0.1 for epoch 0, 0.099 for epoch 1, and
|
||||
so on.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_value: float,
|
||||
power: float,
|
||||
) -> None:
|
||||
self._base_value = base_value
|
||||
self._power = power
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
return self._base_value * (1 - where) ** self._power
|
||||
|
||||
|
||||
class StepParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Takes a fixed schedule for a param value. If the length of the
|
||||
fixed schedule is less than the number of epochs, then the epochs
|
||||
are divided evenly among the param schedule.
|
||||
The schedule is updated after every train epoch by default.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
StepParamScheduler(values=[0.1, 0.01, 0.001, 0.0001], num_updates=120)
|
||||
|
||||
Then the param value will be 0.1 for epochs 0-29, 0.01 for
|
||||
epochs 30-59, 0.001 for epoch 60-89, 0.0001 for epochs 90-119.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_updates: Union[int, float],
|
||||
values: List[float],
|
||||
) -> None:
|
||||
if num_updates <= 0:
|
||||
raise ValueError("Number of updates must be larger than 0")
|
||||
if not (isinstance(values, Sequence) and len(values) > 0):
|
||||
raise ValueError(
|
||||
"Step scheduler requires a list of at least one param value"
|
||||
)
|
||||
self._param_schedule = values
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
ind = int((where + self.WHERE_EPSILON) * len(self._param_schedule))
|
||||
return self._param_schedule[ind]
|
||||
|
||||
|
||||
class StepWithFixedGammaParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Decays the param value by gamma at equal number of steps so as to have the
|
||||
specified total number of decays.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
StepWithFixedGammaParamScheduler(
|
||||
base_value=0.1, gamma=0.1, num_decays=3, num_updates=120)
|
||||
|
||||
Then the param value will be 0.1 for epochs 0-29, 0.01 for
|
||||
epochs 30-59, 0.001 for epoch 60-89, 0.0001 for epochs 90-119.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_value: float,
|
||||
num_decays: int,
|
||||
gamma: float,
|
||||
num_updates: int,
|
||||
) -> None:
|
||||
for k in [base_value, gamma]:
|
||||
if not (isinstance(k, (int, float)) and k > 0):
|
||||
raise ValueError("base_value and gamma must be positive numbers")
|
||||
for k in [num_decays, num_updates]:
|
||||
if not (isinstance(k, int) and k > 0):
|
||||
raise ValueError("num_decays and num_updates must be positive integers")
|
||||
|
||||
self.base_value = base_value
|
||||
self.num_decays = num_decays
|
||||
self.gamma = gamma
|
||||
self.num_updates = num_updates
|
||||
values = [base_value]
|
||||
for _ in range(num_decays):
|
||||
values.append(values[-1] * gamma)
|
||||
|
||||
self._step_param_scheduler = StepParamScheduler(
|
||||
num_updates=num_updates, values=values
|
||||
)
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
return self._step_param_scheduler(where)
|
||||
|
||||
|
||||
class CompositeParamScheduler(ParamScheduler):
|
||||
"""
|
||||
Composite parameter scheduler composed of intermediate schedulers.
|
||||
Takes a list of schedulers and a list of lengths corresponding to
|
||||
percentage of training each scheduler should run for. Schedulers
|
||||
are run in order. All values in lengths should sum to 1.0.
|
||||
|
||||
Each scheduler also has a corresponding interval scale. If interval
|
||||
scale is 'fixed', the intermediate scheduler will be run without any rescaling
|
||||
of the time. If interval scale is 'rescaled', intermediate scheduler is
|
||||
run such that each scheduler will start and end at the same values as it
|
||||
would if it were the only scheduler. Default is 'rescaled' for all schedulers.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
schedulers = [
|
||||
ConstantParamScheduler(value=0.42),
|
||||
CosineParamScheduler(start_value=0.42, end_value=1e-4)
|
||||
]
|
||||
CompositeParamScheduler(
|
||||
schedulers=schedulers,
|
||||
interval_scaling=['rescaled', 'rescaled'],
|
||||
lengths=[0.3, 0.7])
|
||||
|
||||
The parameter value will be 0.42 for the first [0%, 30%) of steps,
|
||||
and then will cosine decay from 0.42 to 0.0001 for [30%, 100%) of
|
||||
training.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
schedulers: Sequence[ParamScheduler],
|
||||
lengths: List[float],
|
||||
interval_scaling: Sequence[str],
|
||||
) -> None:
|
||||
if len(schedulers) != len(lengths):
|
||||
raise ValueError("Schedulers and lengths must be same length")
|
||||
if len(schedulers) == 0:
|
||||
raise ValueError(
|
||||
"There must be at least one scheduler in the composite scheduler"
|
||||
)
|
||||
if abs(sum(lengths) - 1.0) >= 1e-3:
|
||||
raise ValueError("The sum of all values in lengths must be 1")
|
||||
if sum(lengths) != 1.0:
|
||||
lengths[-1] = 1.0 - sum(lengths[:-1])
|
||||
for s in interval_scaling:
|
||||
if s not in ["rescaled", "fixed"]:
|
||||
raise ValueError(f"Unsupported interval_scaling: {s}")
|
||||
|
||||
self._lengths = lengths
|
||||
self._schedulers = schedulers
|
||||
self._interval_scaling = interval_scaling
|
||||
|
||||
def __call__(self, where: float) -> float:
|
||||
# Find scheduler corresponding to where
|
||||
i = 0
|
||||
running_total = self._lengths[i]
|
||||
while (where + self.WHERE_EPSILON) > running_total and i < len(
|
||||
self._schedulers
|
||||
) - 1:
|
||||
i += 1
|
||||
running_total += self._lengths[i]
|
||||
scheduler = self._schedulers[i]
|
||||
scheduler_where = where
|
||||
interval_scale = self._interval_scaling[i]
|
||||
if interval_scale == "rescaled":
|
||||
# Calculate corresponding where % for scheduler
|
||||
scheduler_start = running_total - self._lengths[i]
|
||||
scheduler_where = (where - scheduler_start) / self._lengths[i]
|
||||
return scheduler(scheduler_where)
|
||||
|
||||
|
||||
class WarmupParamScheduler(CompositeParamScheduler):
|
||||
"""
|
||||
Add an initial warmup stage to another scheduler.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: ParamScheduler,
|
||||
warmup_factor: float,
|
||||
warmup_length: float,
|
||||
warmup_method: str = "linear",
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
scheduler: warmup will be added at the beginning of this scheduler
|
||||
warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001
|
||||
warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire
|
||||
training, e.g. 0.01
|
||||
warmup_method: one of "linear" or "constant"
|
||||
"""
|
||||
end_value = scheduler(warmup_length) # the value to reach when warmup ends
|
||||
start_value = warmup_factor * scheduler(0.0)
|
||||
if warmup_method == "constant":
|
||||
warmup = ConstantParamScheduler(start_value)
|
||||
elif warmup_method == "linear":
|
||||
warmup = LinearParamScheduler(start_value, end_value)
|
||||
else:
|
||||
raise ValueError("Unknown warmup method: {}".format(warmup_method))
|
||||
super().__init__(
|
||||
[warmup, scheduler],
|
||||
interval_scaling=["rescaled", "fixed"],
|
||||
lengths=[warmup_length, 1 - warmup_length],
|
||||
)
|
||||
|
||||
|
||||
##### LR Scheduler
|
||||
|
||||
|
||||
class LRMultiplier(torch.optim.lr_scheduler._LRScheduler):
|
||||
"""
|
||||
A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
|
||||
learning rate of each param in the optimizer.
|
||||
Every step, the learning rate of each parameter becomes its initial value
|
||||
multiplied by the output of the given :class:`ParamScheduler`.
|
||||
The absolute learning rate value of each parameter can be different.
|
||||
This scheduler can be used as long as the relative scale among them do
|
||||
not change during training.
|
||||
Examples:
|
||||
::
|
||||
LRMultiplier(
|
||||
opt,
|
||||
WarmupParamScheduler(
|
||||
MultiStepParamScheduler(
|
||||
[1, 0.1, 0.01],
|
||||
milestones=[60000, 80000],
|
||||
num_updates=90000,
|
||||
), 0.001, 100 / 90000
|
||||
),
|
||||
max_iter=90000
|
||||
)
|
||||
"""
|
||||
|
||||
# NOTES: in the most general case, every LR can use its own scheduler.
|
||||
# Supporting this requires interaction with the optimizer when its parameter
|
||||
# group is initialized. For example, classyvision implements its own optimizer
|
||||
# that allows different schedulers for every parameter group.
|
||||
# To avoid this complexity, we use this class to support the most common cases
|
||||
# where the relative scale among all LRs stay unchanged during training. In this
|
||||
# case we only need a total of one scheduler that defines the relative LR multiplier.
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
multiplier: ParamScheduler,
|
||||
max_iter: int,
|
||||
last_iter: int = -1,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
optimizer, last_iter: See ``torch.optim.lr_scheduler._LRScheduler``.
|
||||
``last_iter`` is the same as ``last_epoch``.
|
||||
multiplier: a fvcore ParamScheduler that defines the multiplier on
|
||||
every LR of the optimizer
|
||||
max_iter: the total number of training iterations
|
||||
"""
|
||||
if not isinstance(multiplier, ParamScheduler):
|
||||
raise ValueError(
|
||||
"_LRMultiplier(multiplier=) must be an instance of fvcore "
|
||||
f"ParamScheduler. Got {multiplier} instead."
|
||||
)
|
||||
self._multiplier = multiplier
|
||||
self._max_iter = max_iter
|
||||
super().__init__(optimizer, last_epoch=last_iter)
|
||||
|
||||
def state_dict(self):
|
||||
# fvcore schedulers are stateless. Only keep pytorch scheduler states
|
||||
return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}
|
||||
|
||||
def get_lr(self) -> List[float]:
|
||||
multiplier = self._multiplier(self.last_epoch / self._max_iter)
|
||||
return [base_lr * multiplier for base_lr in self.base_lrs]
|
26
AutoDL-Projects/xautodl/xmisc/time_utils.py
Normal file
26
AutoDL-Projects/xautodl/xmisc/time_utils.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
|
||||
#####################################################
|
||||
import time
|
||||
|
||||
|
||||
def time_for_file():
|
||||
ISOTIMEFORMAT = "%d-%h-at-%H-%M-%S"
|
||||
return "{:}".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
|
||||
|
||||
|
||||
def time_string():
|
||||
ISOTIMEFORMAT = "%Y-%m-%d %X"
|
||||
string = "[{:}]".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
|
||||
return string
|
||||
|
||||
|
||||
def convert_secs2time(epoch_time, return_str=False):
|
||||
need_hour = int(epoch_time / 3600)
|
||||
need_mins = int((epoch_time - 3600 * need_hour) / 60)
|
||||
need_secs = int(epoch_time - 3600 * need_hour - 60 * need_mins)
|
||||
if return_str:
|
||||
str = "[{:02d}:{:02d}:{:02d}]".format(need_hour, need_mins, need_secs)
|
||||
return str
|
||||
else:
|
||||
return need_hour, need_mins, need_secs
|
26
AutoDL-Projects/xautodl/xmisc/torch_utils.py
Normal file
26
AutoDL-Projects/xautodl/xmisc/torch_utils.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
|
||||
#####################################################
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
|
||||
def count_parameters(model_or_parameters, unit="mb"):
|
||||
if isinstance(model_or_parameters, nn.Module):
|
||||
counts = sum(np.prod(v.size()) for v in model_or_parameters.parameters())
|
||||
elif isinstance(model_or_parameters, nn.Parameter):
|
||||
counts = models_or_parameters.numel()
|
||||
elif isinstance(model_or_parameters, (list, tuple)):
|
||||
counts = sum(count_parameters(x, None) for x in models_or_parameters)
|
||||
else:
|
||||
counts = sum(np.prod(v.size()) for v in model_or_parameters)
|
||||
if unit.lower() == "kb" or unit.lower() == "k":
|
||||
counts /= 1e3
|
||||
elif unit.lower() == "mb" or unit.lower() == "m":
|
||||
counts /= 1e6
|
||||
elif unit.lower() == "gb" or unit.lower() == "g":
|
||||
counts /= 1e9
|
||||
elif unit is not None:
|
||||
raise ValueError("Unknow unit: {:}".format(unit))
|
||||
return counts
|
13
AutoDL-Projects/xautodl/xmisc/yaml_utils.py
Normal file
13
AutoDL-Projects/xautodl/xmisc/yaml_utils.py
Normal file
@@ -0,0 +1,13 @@
|
||||
#####################################################
|
||||
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
|
||||
#####################################################
|
||||
import os
|
||||
import yaml
|
||||
|
||||
|
||||
def load_yaml(path):
|
||||
if not os.path.isfile(path):
|
||||
raise ValueError("{:} is not a file.".format(path))
|
||||
with open(path, "r") as stream:
|
||||
data = yaml.safe_load(stream)
|
||||
return data
|
Reference in New Issue
Block a user