add autodl

2024-08-25 18:02:31 +02:00
parent 192f286cfb
commit a0a25f291c
431 changed files with 50646 additions and 8 deletions
--- a/AutoDL-Projects/xautodl/xmisc/init.py
+++ b/AutoDL-Projects/xautodl/xmisc/init.py
@@ -0,0 +1,36 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
+#####################################################
+"""The module and yaml related functions."""
+from .module_utils import call_by_dict
+from .module_utils import call_by_yaml
+from .module_utils import nested_call_by_dict
+from .module_utils import nested_call_by_yaml
+from .yaml_utils import load_yaml
+
+from .torch_utils import count_parameters
+
+from .logger_utils import Logger
+
+"""The data sampler related classes."""
+from .sampler_utils import BatchSampler
+
+"""The meter related classes."""
+from .meter_utils import AverageMeter
+
+"""The scheduler related classes."""
+from .scheduler_utils import CosineParamScheduler, WarmupParamScheduler, LRMultiplier
+
+
+def get_scheduler(indicator, lr):
+    if indicator == "warm-cos":
+        multiplier = WarmupParamScheduler(
+            CosineParamScheduler(lr, lr * 1e-3),
+            warmup_factor=0.001,
+            warmup_length=0.05,
+            warmup_method="linear",
+        )
+
+    else:
+        raise ValueError("Unknown indicator: {:}".format(indicator))
+    return multiplier
--- a/AutoDL-Projects/xautodl/xmisc/logger_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/logger_utils.py
@@ -0,0 +1,49 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
+#####################################################
+import sys
+from pathlib import Path
+
+from .time_utils import time_for_file, time_string
+
+
+class Logger:
+    """A logger used in xautodl."""
+
+    def __init__(self, root_dir, prefix="", log_time=True):
+        """Create a summary writer logging to log_dir."""
+        self.root_dir = Path(root_dir)
+        self.log_dir = self.root_dir / "logs"
+        self.log_dir.mkdir(parents=True, exist_ok=True)
+
+        self._prefix = prefix
+        self._log_time = log_time
+        self.logger_path = self.log_dir / "{:}{:}.log".format(
+            self._prefix, time_for_file()
+        )
+        self._logger_file = open(self.logger_path, "w")
+
+    @property
+    def logger(self):
+        return self._logger_file
+
+    def log(self, string, save=True, stdout=False):
+        string = "{:} {:}".format(time_string(), string) if self._log_time else string
+        if stdout:
+            sys.stdout.write(string)
+            sys.stdout.flush()
+        else:
+            print(string)
+        if save:
+            self._logger_file.write("{:}\n".format(string))
+            self._logger_file.flush()
+
+    def close(self):
+        self._logger_file.close()
+        if self.writer is not None:
+            self.writer.close()
+
+    def __repr__(self):
+        return "{name}(dir={log_dir}, prefix={_prefix}, log_time={_log_time})".format(
+            name=self.__class__.__name__, **self.__dict__
+        )
--- a/AutoDL-Projects/xautodl/xmisc/meter_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/meter_utils.py
@@ -0,0 +1,163 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2020.06 #
+#####################################################
+# In this python file, it contains the meter classes#
+# , which may need to use PyTorch or Numpy.         #
+#####################################################
+import abc
+import torch
+import torch.nn.functional as F
+
+
+class AverageMeter:
+    """Computes and stores the average and current value"""
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0.0
+        self.avg = 0.0
+        self.sum = 0.0
+        self.count = 0.0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def __repr__(self):
+        return "{name}(val={val}, avg={avg}, count={count})".format(
+            name=self.__class__.__name__, **self.__dict__
+        )
+
+
+class Metric(abc.ABC):
+    """The default meta metric class."""
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        raise NotImplementedError
+
+    def __call__(self, predictions, targets):
+        raise NotImplementedError
+
+    def get_info(self):
+        raise NotImplementedError
+
+    def perf_str(self):
+        raise NotImplementedError
+
+    def __repr__(self):
+        return "{name}({inner})".format(
+            name=self.__class__.__name__, inner=self.inner_repr()
+        )
+
+    def inner_repr(self):
+        return ""
+
+
+class ComposeMetric(Metric):
+    """The composed metric class."""
+
+    def __init__(self, *metric_list):
+        self.reset()
+        for metric in metric_list:
+            self.append(metric)
+
+    def reset(self):
+        self._metric_list = []
+
+    def append(self, metric):
+        if not isinstance(metric, Metric):
+            raise ValueError(
+                "The input metric is not correct: {:}".format(type(metric))
+            )
+        self._metric_list.append(metric)
+
+    def __len__(self):
+        return len(self._metric_list)
+
+    def __call__(self, predictions, targets):
+        results = list()
+        for metric in self._metric_list:
+            results.append(metric(predictions, targets))
+        return results
+
+    def get_info(self):
+        results = dict()
+        for metric in self._metric_list:
+            for key, value in metric.get_info().items():
+                results[key] = value
+        return results
+
+    def inner_repr(self):
+        xlist = []
+        for metric in self._metric_list:
+            xlist.append(str(metric))
+        return ",".join(xlist)
+
+
+class CrossEntropyMetric(Metric):
+    """The metric for the cross entropy metric."""
+
+    def __init__(self, ignore_batch):
+        super(CrossEntropyMetric, self).__init__()
+        self._ignore_batch = ignore_batch
+
+    def reset(self):
+        self._loss = AverageMeter()
+
+    def __call__(self, predictions, targets):
+        if isinstance(predictions, torch.Tensor) and isinstance(targets, torch.Tensor):
+            batch, _ = predictions.shape()  # only support 2-D tensor
+            max_prob_indexes = torch.argmax(predictions, dim=-1)
+            if self._ignore_batch:
+                loss = F.cross_entropy(predictions, targets, reduction="sum")
+                self._loss.update(loss.item(), 1)
+            else:
+                loss = F.cross_entropy(predictions, targets, reduction="mean")
+                self._loss.update(loss.item(), batch)
+            return loss
+        else:
+            raise NotImplementedError
+
+    def get_info(self):
+        return {"loss": self._loss.avg, "score": self._loss.avg * 100}
+
+    def perf_str(self):
+        return "ce-loss={:.5f}".format(self._loss.avg)
+
+
+class Top1AccMetric(Metric):
+    """The metric for the top-1 accuracy."""
+
+    def __init__(self, ignore_batch):
+        super(Top1AccMetric, self).__init__()
+        self._ignore_batch = ignore_batch
+
+    def reset(self):
+        self._accuracy = AverageMeter()
+
+    def __call__(self, predictions, targets):
+        if isinstance(predictions, torch.Tensor) and isinstance(targets, torch.Tensor):
+            batch, _ = predictions.shape()  # only support 2-D tensor
+            max_prob_indexes = torch.argmax(predictions, dim=-1)
+            corrects = torch.eq(max_prob_indexes, targets)
+            accuracy = corrects.float().mean().float()
+            if self._ignore_batch:
+                self._accuracy.update(accuracy, 1)
+            else:
+                self._accuracy.update(accuracy, batch)
+            return accuracy
+        else:
+            raise NotImplementedError
+
+    def get_info(self):
+        return {"accuracy": self._accuracy.avg, "score": self._accuracy.avg * 100}
+
+    def perf_str(self):
+        return "accuracy={:.3f}%".format(self._accuracy.avg * 100)
--- a/AutoDL-Projects/xautodl/xmisc/module_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/module_utils.py
@@ -0,0 +1,88 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.01 #
+#####################################################
+from typing import Union, Dict, Text, Any
+import importlib
+
+from .yaml_utils import load_yaml
+
+CLS_FUNC_KEY = "class_or_func"
+KEYS = (CLS_FUNC_KEY, "module_path", "args", "kwargs")
+
+
+def has_key_words(xdict):
+    if not isinstance(xdict, dict):
+        return False
+    key_set = set(KEYS)
+    cur_set = set(xdict.keys())
+    return key_set.intersection(cur_set) == key_set
+
+
+def get_module_by_module_path(module_path):
+    """Load the module from the path."""
+
+    if module_path.endswith(".py"):
+        module_spec = importlib.util.spec_from_file_location("", module_path)
+        module = importlib.util.module_from_spec(module_spec)
+        module_spec.loader.exec_module(module)
+    else:
+        module = importlib.import_module(module_path)
+
+    return module
+
+
+def call_by_dict(config: Dict[Text, Any], *args, **kwargs) -> object:
+    """
+    get initialized instance with config
+    Parameters
+    ----------
+    config : a dictionary, such as:
+            {
+                'cls_or_func': 'ClassName',
+                'args': list,
+                'kwargs': dict,
+                'model_path': a string indicating the path,
+            }
+    Returns
+    -------
+    object:
+        An initialized object based on the config info
+    """
+    module = get_module_by_module_path(config["module_path"])
+    cls_or_func = getattr(module, config[CLS_FUNC_KEY])
+    args = tuple(list(config["args"]) + list(args))
+    kwargs = {**config["kwargs"], **kwargs}
+    return cls_or_func(*args, **kwargs)
+
+
+def call_by_yaml(path, *args, **kwargs) -> object:
+    config = load_yaml(path)
+    return call_by_config(config, *args, **kwargs)
+
+
+def nested_call_by_dict(config: Union[Dict[Text, Any], Any], *args, **kwargs) -> object:
+    """Similar to `call_by_dict`, but differently, the args may contain another dict needs to be called."""
+    if isinstance(config, list):
+        return [nested_call_by_dict(x) for x in config]
+    elif isinstance(config, tuple):
+        return (nested_call_by_dict(x) for x in config)
+    elif not isinstance(config, dict):
+        return config
+    elif not has_key_words(config):
+        return {key: nested_call_by_dict(x) for x, key in config.items()}
+    else:
+        module = get_module_by_module_path(config["module_path"])
+        cls_or_func = getattr(module, config[CLS_FUNC_KEY])
+        args = tuple(list(config["args"]) + list(args))
+        kwargs = {**config["kwargs"], **kwargs}
+        # check whether there are nested special dict
+        new_args = [nested_call_by_dict(x) for x in args]
+        new_kwargs = {}
+        for key, x in kwargs.items():
+            new_kwargs[key] = nested_call_by_dict(x)
+        return cls_or_func(*new_args, **new_kwargs)
+
+
+def nested_call_by_yaml(path, *args, **kwargs) -> object:
+    config = load_yaml(path)
+    return nested_call_by_dict(config, *args, **kwargs)
--- a/AutoDL-Projects/xautodl/xmisc/sampler_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/sampler_utils.py
@@ -0,0 +1,32 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
+#####################################################
+import random
+
+
+class BatchSampler:
+    """A batch sampler used for single machine training."""
+
+    def __init__(self, dataset, batch, steps):
+        self._num_per_epoch = len(dataset)
+        self._iter_per_epoch = self._num_per_epoch // batch
+        self._steps = steps
+        self._batch = batch
+        if self._num_per_epoch < self._batch:
+            raise ValueError(
+                "The dataset size must be larger than batch={:}".format(batch)
+            )
+        self._indexes = list(range(self._num_per_epoch))
+
+    def __iter__(self):
+        """
+        yield a batch of indexes using random sampling
+        """
+        for i in range(self._steps):
+            if i % self._iter_per_epoch == 0:
+                random.shuffle(self._indexes)
+            j = i % self._iter_per_epoch
+            yield self._indexes[j * self._batch : (j + 1) * self._batch]
+
+    def __len__(self):
+        return self._steps
--- a/AutoDL-Projects/xautodl/xmisc/scheduler_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/scheduler_utils.py
@@ -0,0 +1,532 @@
+####################################################
+# Copyright (c) Facebook, Inc. and its affiliates. #
+####################################################
+# Borrowed from https://github.com/facebookresearch/fvcore/blob/master/fvcore/common/param_scheduler.py
+#           and https://github.com/facebookresearch/detectron2/blob/master/detectron2/solver/lr_scheduler.py
+####################################################
+import torch
+
+import bisect
+import math
+from typing import List, Optional, Sequence, Union
+
+__all__ = [
+    "ParamScheduler",
+    "ConstantParamScheduler",
+    "CosineParamScheduler",
+    "ExponentialParamScheduler",
+    "LinearParamScheduler",
+    "CompositeParamScheduler",
+    "MultiStepParamScheduler",
+    "StepParamScheduler",
+    "StepWithFixedGammaParamScheduler",
+    "PolynomialDecayParamScheduler",
+    "WarmupParamScheduler",
+    "LRMultiplier",
+]
+
+
+class ParamScheduler:
+    """
+    Base class for parameter schedulers.
+    A parameter scheduler defines a mapping from a progress value in [0, 1) to
+    a number (e.g. learning rate).
+    """
+
+    # To be used for comparisons with where
+    WHERE_EPSILON = 1e-6
+
+    def __call__(self, where: float) -> float:
+        """
+        Get the value of the param for a given point at training.
+
+        We update params (such as learning rate) based on the percent progress
+        of training completed. This allows a scheduler to be agnostic to the
+        exact length of a particular run (e.g. 120 epochs vs 90 epochs), as
+        long as the relative progress where params should be updated is the same.
+        However, it assumes that the total length of training is known.
+
+        Args:
+            where: A float in [0,1) that represents how far training has progressed
+
+        """
+        raise NotImplementedError("Param schedulers must override __call__")
+
+
+class ConstantParamScheduler(ParamScheduler):
+    """
+    Returns a constant value for a param.
+    """
+
+    def __init__(self, value: float) -> None:
+        self._value = value
+
+    def __call__(self, where: float) -> float:
+        if where >= 1.0:
+            raise RuntimeError(
+                f"where in ParamScheduler must be in [0, 1]: got {where}"
+            )
+        return self._value
+
+
+class CosineParamScheduler(ParamScheduler):
+    """
+    Cosine decay or cosine warmup schedules based on start and end values.
+    The schedule is updated based on the fraction of training progress.
+    The schedule was proposed in 'SGDR: Stochastic Gradient Descent with
+    Warm Restarts' (https://arxiv.org/abs/1608.03983). Note that this class
+    only implements the cosine annealing part of SGDR, and not the restarts.
+
+    Example:
+
+        .. code-block:: python
+
+          CosineParamScheduler(start_value=0.1, end_value=0.0001)
+    """
+
+    def __init__(
+        self,
+        start_value: float,
+        end_value: float,
+    ) -> None:
+        self._start_value = start_value
+        self._end_value = end_value
+
+    def __call__(self, where: float) -> float:
+        return self._end_value + 0.5 * (self._start_value - self._end_value) * (
+            1 + math.cos(math.pi * where)
+        )
+
+
+class ExponentialParamScheduler(ParamScheduler):
+    """
+    Exponetial schedule parameterized by a start value and decay.
+    The schedule is updated based on the fraction of training
+    progress, `where`, with the formula
+    `param_t = start_value * (decay ** where)`.
+
+    Example:
+
+        .. code-block:: python
+            ExponentialParamScheduler(start_value=2.0, decay=0.02)
+
+    Corresponds to a decreasing schedule with values in [2.0, 0.04).
+    """
+
+    def __init__(
+        self,
+        start_value: float,
+        decay: float,
+    ) -> None:
+        self._start_value = start_value
+        self._decay = decay
+
+    def __call__(self, where: float) -> float:
+        return self._start_value * (self._decay**where)
+
+
+class LinearParamScheduler(ParamScheduler):
+    """
+    Linearly interpolates parameter between ``start_value`` and ``end_value``.
+    Can be used for either warmup or decay based on start and end values.
+    The schedule is updated after every train step by default.
+
+    Example:
+
+        .. code-block:: python
+
+            LinearParamScheduler(start_value=0.0001, end_value=0.01)
+
+    Corresponds to a linear increasing schedule with values in [0.0001, 0.01)
+    """
+
+    def __init__(
+        self,
+        start_value: float,
+        end_value: float,
+    ) -> None:
+        self._start_value = start_value
+        self._end_value = end_value
+
+    def __call__(self, where: float) -> float:
+        # interpolate between start and end values
+        return self._end_value * where + self._start_value * (1 - where)
+
+
+class MultiStepParamScheduler(ParamScheduler):
+    """
+    Takes a predefined schedule for a param value, and a list of epochs or steps
+    which stand for the upper boundary (excluded) of each range.
+
+    Example:
+
+        .. code-block:: python
+
+          MultiStepParamScheduler(
+            values=[0.1, 0.01, 0.001, 0.0001],
+            milestones=[30, 60, 80, 120]
+          )
+
+    Then the param value will be 0.1 for epochs 0-29, 0.01 for
+    epochs 30-59, 0.001 for epochs 60-79, 0.0001 for epochs 80-120.
+    Note that the length of values must be equal to the length of milestones
+    plus one.
+    """
+
+    def __init__(
+        self,
+        values: List[float],
+        num_updates: Optional[int] = None,
+        milestones: Optional[List[int]] = None,
+    ) -> None:
+        """
+        Args:
+            values: param value in each range
+            num_updates: the end of the last range. If None, will use ``milestones[-1]``
+            milestones: the boundary of each range. If None, will evenly split ``num_updates``
+
+        For example, all the following combinations define the same scheduler:
+
+        * num_updates=90, milestones=[30, 60], values=[1, 0.1, 0.01]
+        * num_updates=90, values=[1, 0.1, 0.01]
+        * milestones=[30, 60, 90], values=[1, 0.1, 0.01]
+        * milestones=[3, 6, 9], values=[1, 0.1, 0.01]  (ParamScheduler is scale-invariant)
+        """
+        if num_updates is None and milestones is None:
+            raise ValueError("num_updates and milestones cannot both be None")
+        if milestones is None:
+            # Default equispaced drop_epochs behavior
+            milestones = []
+            step_width = math.ceil(num_updates / float(len(values)))
+            for idx in range(len(values) - 1):
+                milestones.append(step_width * (idx + 1))
+        else:
+            if not (
+                isinstance(milestones, Sequence)
+                and len(milestones) == len(values) - int(num_updates is not None)
+            ):
+                raise ValueError(
+                    "MultiStep scheduler requires a list of %d miletones"
+                    % (len(values) - int(num_updates is not None))
+                )
+
+        if num_updates is None:
+            num_updates, milestones = milestones[-1], milestones[:-1]
+        if num_updates < len(values):
+            raise ValueError(
+                "Total num_updates must be greater than length of param schedule"
+            )
+
+        self._param_schedule = values
+        self._num_updates = num_updates
+        self._milestones: List[int] = milestones
+
+        start_epoch = 0
+        for milestone in self._milestones:
+            # Do not exceed the total number of epochs
+            if milestone >= self._num_updates:
+                raise ValueError(
+                    "Milestone must be smaller than total number of updates: "
+                    "num_updates=%d, milestone=%d" % (self._num_updates, milestone)
+                )
+            # Must be in ascending order
+            if start_epoch >= milestone:
+                raise ValueError(
+                    "Milestone must be smaller than start epoch: start_epoch=%d, milestone=%d"
+                    % (start_epoch, milestone)
+                )
+            start_epoch = milestone
+
+    def __call__(self, where: float) -> float:
+        if where > 1.0:
+            raise RuntimeError(
+                f"where in ParamScheduler must be in [0, 1]: got {where}"
+            )
+        epoch_num = int((where + self.WHERE_EPSILON) * self._num_updates)
+        return self._param_schedule[bisect.bisect_right(self._milestones, epoch_num)]
+
+
+class PolynomialDecayParamScheduler(ParamScheduler):
+    """
+    Decays the param value after every epoch according to a
+    polynomial function with a fixed power.
+    The schedule is updated after every train step by default.
+
+    Example:
+
+        .. code-block:: python
+
+          PolynomialDecayParamScheduler(base_value=0.1, power=0.9)
+
+    Then the param value will be 0.1 for epoch 0, 0.099 for epoch 1, and
+    so on.
+    """
+
+    def __init__(
+        self,
+        base_value: float,
+        power: float,
+    ) -> None:
+        self._base_value = base_value
+        self._power = power
+
+    def __call__(self, where: float) -> float:
+        return self._base_value * (1 - where) ** self._power
+
+
+class StepParamScheduler(ParamScheduler):
+    """
+    Takes a fixed schedule for a param value.  If the length of the
+    fixed schedule is less than the number of epochs, then the epochs
+    are divided evenly among the param schedule.
+    The schedule is updated after every train epoch by default.
+
+    Example:
+
+        .. code-block:: python
+
+          StepParamScheduler(values=[0.1, 0.01, 0.001, 0.0001], num_updates=120)
+
+    Then the param value will be 0.1 for epochs 0-29, 0.01 for
+    epochs 30-59, 0.001 for epoch 60-89, 0.0001 for epochs 90-119.
+    """
+
+    def __init__(
+        self,
+        num_updates: Union[int, float],
+        values: List[float],
+    ) -> None:
+        if num_updates <= 0:
+            raise ValueError("Number of updates must be larger than 0")
+        if not (isinstance(values, Sequence) and len(values) > 0):
+            raise ValueError(
+                "Step scheduler requires a list of at least one param value"
+            )
+        self._param_schedule = values
+
+    def __call__(self, where: float) -> float:
+        ind = int((where + self.WHERE_EPSILON) * len(self._param_schedule))
+        return self._param_schedule[ind]
+
+
+class StepWithFixedGammaParamScheduler(ParamScheduler):
+    """
+    Decays the param value by gamma at equal number of steps so as to have the
+    specified total number of decays.
+
+    Example:
+
+        .. code-block:: python
+
+          StepWithFixedGammaParamScheduler(
+            base_value=0.1, gamma=0.1, num_decays=3, num_updates=120)
+
+    Then the param value will be 0.1 for epochs 0-29, 0.01 for
+    epochs 30-59, 0.001 for epoch 60-89, 0.0001 for epochs 90-119.
+    """
+
+    def __init__(
+        self,
+        base_value: float,
+        num_decays: int,
+        gamma: float,
+        num_updates: int,
+    ) -> None:
+        for k in [base_value, gamma]:
+            if not (isinstance(k, (int, float)) and k > 0):
+                raise ValueError("base_value and gamma must be positive numbers")
+        for k in [num_decays, num_updates]:
+            if not (isinstance(k, int) and k > 0):
+                raise ValueError("num_decays and num_updates must be positive integers")
+
+        self.base_value = base_value
+        self.num_decays = num_decays
+        self.gamma = gamma
+        self.num_updates = num_updates
+        values = [base_value]
+        for _ in range(num_decays):
+            values.append(values[-1] * gamma)
+
+        self._step_param_scheduler = StepParamScheduler(
+            num_updates=num_updates, values=values
+        )
+
+    def __call__(self, where: float) -> float:
+        return self._step_param_scheduler(where)
+
+
+class CompositeParamScheduler(ParamScheduler):
+    """
+    Composite parameter scheduler composed of intermediate schedulers.
+    Takes a list of schedulers and a list of lengths corresponding to
+    percentage of training each scheduler should run for. Schedulers
+    are run in order. All values in lengths should sum to 1.0.
+
+    Each scheduler also has a corresponding interval scale. If interval
+    scale is 'fixed', the intermediate scheduler will be run without any rescaling
+    of the time. If interval scale is 'rescaled', intermediate scheduler is
+    run such that each scheduler will start and end at the same values as it
+    would if it were the only scheduler. Default is 'rescaled' for all schedulers.
+
+    Example:
+
+        .. code-block:: python
+
+              schedulers = [
+                ConstantParamScheduler(value=0.42),
+                CosineParamScheduler(start_value=0.42, end_value=1e-4)
+              ]
+              CompositeParamScheduler(
+                schedulers=schedulers,
+                interval_scaling=['rescaled', 'rescaled'],
+                lengths=[0.3, 0.7])
+
+    The parameter value will be 0.42 for the first [0%, 30%) of steps,
+    and then will cosine decay from 0.42 to 0.0001 for [30%, 100%) of
+    training.
+    """
+
+    def __init__(
+        self,
+        schedulers: Sequence[ParamScheduler],
+        lengths: List[float],
+        interval_scaling: Sequence[str],
+    ) -> None:
+        if len(schedulers) != len(lengths):
+            raise ValueError("Schedulers and lengths must be same length")
+        if len(schedulers) == 0:
+            raise ValueError(
+                "There must be at least one scheduler in the composite scheduler"
+            )
+        if abs(sum(lengths) - 1.0) >= 1e-3:
+            raise ValueError("The sum of all values in lengths must be 1")
+        if sum(lengths) != 1.0:
+            lengths[-1] = 1.0 - sum(lengths[:-1])
+        for s in interval_scaling:
+            if s not in ["rescaled", "fixed"]:
+                raise ValueError(f"Unsupported interval_scaling: {s}")
+
+        self._lengths = lengths
+        self._schedulers = schedulers
+        self._interval_scaling = interval_scaling
+
+    def __call__(self, where: float) -> float:
+        # Find scheduler corresponding to where
+        i = 0
+        running_total = self._lengths[i]
+        while (where + self.WHERE_EPSILON) > running_total and i < len(
+            self._schedulers
+        ) - 1:
+            i += 1
+            running_total += self._lengths[i]
+        scheduler = self._schedulers[i]
+        scheduler_where = where
+        interval_scale = self._interval_scaling[i]
+        if interval_scale == "rescaled":
+            # Calculate corresponding where % for scheduler
+            scheduler_start = running_total - self._lengths[i]
+            scheduler_where = (where - scheduler_start) / self._lengths[i]
+        return scheduler(scheduler_where)
+
+
+class WarmupParamScheduler(CompositeParamScheduler):
+    """
+    Add an initial warmup stage to another scheduler.
+    """
+
+    def __init__(
+        self,
+        scheduler: ParamScheduler,
+        warmup_factor: float,
+        warmup_length: float,
+        warmup_method: str = "linear",
+    ):
+        """
+        Args:
+            scheduler: warmup will be added at the beginning of this scheduler
+            warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001
+            warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire
+                training, e.g. 0.01
+            warmup_method: one of "linear" or "constant"
+        """
+        end_value = scheduler(warmup_length)  # the value to reach when warmup ends
+        start_value = warmup_factor * scheduler(0.0)
+        if warmup_method == "constant":
+            warmup = ConstantParamScheduler(start_value)
+        elif warmup_method == "linear":
+            warmup = LinearParamScheduler(start_value, end_value)
+        else:
+            raise ValueError("Unknown warmup method: {}".format(warmup_method))
+        super().__init__(
+            [warmup, scheduler],
+            interval_scaling=["rescaled", "fixed"],
+            lengths=[warmup_length, 1 - warmup_length],
+        )
+
+
+##### LR Scheduler
+
+
+class LRMultiplier(torch.optim.lr_scheduler._LRScheduler):
+    """
+    A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
+    learning rate of each param in the optimizer.
+    Every step, the learning rate of each parameter becomes its initial value
+    multiplied by the output of the given :class:`ParamScheduler`.
+    The absolute learning rate value of each parameter can be different.
+    This scheduler can be used as long as the relative scale among them do
+    not change during training.
+    Examples:
+    ::
+        LRMultiplier(
+            opt,
+            WarmupParamScheduler(
+                MultiStepParamScheduler(
+                    [1, 0.1, 0.01],
+                    milestones=[60000, 80000],
+                    num_updates=90000,
+                ), 0.001, 100 / 90000
+            ),
+            max_iter=90000
+        )
+    """
+
+    # NOTES: in the most general case, every LR can use its own scheduler.
+    # Supporting this requires interaction with the optimizer when its parameter
+    # group is initialized. For example, classyvision implements its own optimizer
+    # that allows different schedulers for every parameter group.
+    # To avoid this complexity, we use this class to support the most common cases
+    # where the relative scale among all LRs stay unchanged during training.  In this
+    # case we only need a total of one scheduler that defines the relative LR multiplier.
+
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        multiplier: ParamScheduler,
+        max_iter: int,
+        last_iter: int = -1,
+    ):
+        """
+        Args:
+            optimizer, last_iter: See ``torch.optim.lr_scheduler._LRScheduler``.
+                ``last_iter`` is the same as ``last_epoch``.
+            multiplier: a fvcore ParamScheduler that defines the multiplier on
+                every LR of the optimizer
+            max_iter: the total number of training iterations
+        """
+        if not isinstance(multiplier, ParamScheduler):
+            raise ValueError(
+                "_LRMultiplier(multiplier=) must be an instance of fvcore "
+                f"ParamScheduler. Got {multiplier} instead."
+            )
+        self._multiplier = multiplier
+        self._max_iter = max_iter
+        super().__init__(optimizer, last_epoch=last_iter)
+
+    def state_dict(self):
+        # fvcore schedulers are stateless. Only keep pytorch scheduler states
+        return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}
+
+    def get_lr(self) -> List[float]:
+        multiplier = self._multiplier(self.last_epoch / self._max_iter)
+        return [base_lr * multiplier for base_lr in self.base_lrs]
--- a/AutoDL-Projects/xautodl/xmisc/time_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/time_utils.py
@@ -0,0 +1,26 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
+#####################################################
+import time
+
+
+def time_for_file():
+    ISOTIMEFORMAT = "%d-%h-at-%H-%M-%S"
+    return "{:}".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
+
+
+def time_string():
+    ISOTIMEFORMAT = "%Y-%m-%d %X"
+    string = "[{:}]".format(time.strftime(ISOTIMEFORMAT, time.gmtime(time.time())))
+    return string
+
+
+def convert_secs2time(epoch_time, return_str=False):
+    need_hour = int(epoch_time / 3600)
+    need_mins = int((epoch_time - 3600 * need_hour) / 60)
+    need_secs = int(epoch_time - 3600 * need_hour - 60 * need_mins)
+    if return_str:
+        str = "[{:02d}:{:02d}:{:02d}]".format(need_hour, need_mins, need_secs)
+        return str
+    else:
+        return need_hour, need_mins, need_secs
--- a/AutoDL-Projects/xautodl/xmisc/torch_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/torch_utils.py
@@ -0,0 +1,26 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
+#####################################################
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+def count_parameters(model_or_parameters, unit="mb"):
+    if isinstance(model_or_parameters, nn.Module):
+        counts = sum(np.prod(v.size()) for v in model_or_parameters.parameters())
+    elif isinstance(model_or_parameters, nn.Parameter):
+        counts = models_or_parameters.numel()
+    elif isinstance(model_or_parameters, (list, tuple)):
+        counts = sum(count_parameters(x, None) for x in models_or_parameters)
+    else:
+        counts = sum(np.prod(v.size()) for v in model_or_parameters)
+    if unit.lower() == "kb" or unit.lower() == "k":
+        counts /= 1e3
+    elif unit.lower() == "mb" or unit.lower() == "m":
+        counts /= 1e6
+    elif unit.lower() == "gb" or unit.lower() == "g":
+        counts /= 1e9
+    elif unit is not None:
+        raise ValueError("Unknow unit: {:}".format(unit))
+    return counts
--- a/AutoDL-Projects/xautodl/xmisc/yaml_utils.py
+++ b/AutoDL-Projects/xautodl/xmisc/yaml_utils.py
@@ -0,0 +1,13 @@
+#####################################################
+# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.06 #
+#####################################################
+import os
+import yaml
+
+
+def load_yaml(path):
+    if not os.path.isfile(path):
+        raise ValueError("{:} is not a file.".format(path))
+    with open(path, "r") as stream:
+        data = yaml.safe_load(stream)
+    return data