Updates
This commit is contained in:
@@ -46,7 +46,7 @@ _default_max_depth = 5
|
||||
DefaultSearchSpace = dict(
|
||||
d_feat=6,
|
||||
stem_dim=spaces.Categorical(*_get_list_mul(8, 16)),
|
||||
embed_dims=_get_mul_specs(_get_list_mul(8, 16), _default_max_depth),
|
||||
embed_dim=spaces.Categorical(*_get_list_mul(8, 16)),
|
||||
num_heads=_get_mul_specs((1, 2, 4, 8), _default_max_depth),
|
||||
mlp_hidden_multipliers=_get_mul_specs((0.5, 1, 2, 4, 8), _default_max_depth),
|
||||
qkv_bias=True,
|
||||
@@ -62,7 +62,7 @@ class SuperTransformer(super_core.SuperModule):
|
||||
self,
|
||||
d_feat: int = 6,
|
||||
stem_dim: super_core.IntSpaceType = DefaultSearchSpace["stem_dim"],
|
||||
embed_dims: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dims"],
|
||||
embed_dim: List[super_core.IntSpaceType] = DefaultSearchSpace["embed_dim"],
|
||||
num_heads: List[super_core.IntSpaceType] = DefaultSearchSpace["num_heads"],
|
||||
mlp_hidden_multipliers: List[super_core.IntSpaceType] = DefaultSearchSpace[
|
||||
"mlp_hidden_multipliers"
|
||||
@@ -73,7 +73,7 @@ class SuperTransformer(super_core.SuperModule):
|
||||
max_seq_len: int = 65,
|
||||
):
|
||||
super(SuperTransformer, self).__init__()
|
||||
self._embed_dims = embed_dims
|
||||
self._embed_dim = embed_dim
|
||||
self._stem_dim = stem_dim
|
||||
self._num_heads = num_heads
|
||||
self._mlp_hidden_multipliers = mlp_hidden_multipliers
|
||||
@@ -85,22 +85,15 @@ class SuperTransformer(super_core.SuperModule):
|
||||
d_model=stem_dim, max_seq_len=max_seq_len, dropout=pos_drop
|
||||
)
|
||||
# build the transformer encode layers -->> check params
|
||||
_assert_types(embed_dims, (tuple, list))
|
||||
_assert_types(num_heads, (tuple, list))
|
||||
_assert_types(mlp_hidden_multipliers, (tuple, list))
|
||||
num_layers = len(embed_dims)
|
||||
assert (
|
||||
num_layers == len(num_heads) == len(mlp_hidden_multipliers)
|
||||
), "{:} vs {:} vs {:}".format(
|
||||
num_layers, len(num_heads), len(mlp_hidden_multipliers)
|
||||
assert len(num_heads) == len(mlp_hidden_multipliers), "{:} vs {:}".format(
|
||||
len(num_heads), len(mlp_hidden_multipliers)
|
||||
)
|
||||
# build the transformer encode layers -->> backbone
|
||||
layers, input_dim = [], stem_dim
|
||||
for embed_dim, num_head, mlp_hidden_multiplier in zip(
|
||||
embed_dims, num_heads, mlp_hidden_multipliers
|
||||
):
|
||||
layers = []
|
||||
for num_head, mlp_hidden_multiplier in zip(num_heads, mlp_hidden_multipliers):
|
||||
layer = super_core.SuperTransformerEncoderLayer(
|
||||
input_dim,
|
||||
embed_dim,
|
||||
num_head,
|
||||
qkv_bias,
|
||||
@@ -108,11 +101,12 @@ class SuperTransformer(super_core.SuperModule):
|
||||
other_drop,
|
||||
)
|
||||
layers.append(layer)
|
||||
input_dim = embed_dim
|
||||
self.backbone = super_core.SuperSequential(*layers)
|
||||
|
||||
# the regression head
|
||||
self.head = super_core.SuperLinear(self._embed_dims[-1], 1)
|
||||
self.head = super_core.SuperSequential(
|
||||
super_core.SuperLayerNorm1D(embed_dim), super_core.SuperLinear(embed_dim, 1)
|
||||
)
|
||||
trunc_normal_(self.cls_token, std=0.02)
|
||||
self.apply(self._init_weights)
|
||||
|
||||
@@ -123,14 +117,16 @@ class SuperTransformer(super_core.SuperModule):
|
||||
@property
|
||||
def abstract_search_space(self):
|
||||
root_node = spaces.VirtualNode(id(self))
|
||||
if not spaces.is_determined(self._stem_dim):
|
||||
root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True))
|
||||
if not spaces.is_determined(self._stem_dim):
|
||||
root_node.append("_embed_dim", self._embed_dim.abstract(reuse_last=True))
|
||||
xdict = dict(
|
||||
input_embed=self.input_embed.abstract_search_space,
|
||||
pos_embed=self.pos_embed.abstract_search_space,
|
||||
backbone=self.backbone.abstract_search_space,
|
||||
head=self.head.abstract_search_space,
|
||||
)
|
||||
if not spaces.is_determined(self._stem_dim):
|
||||
root_node.append("_stem_dim", self._stem_dim.abstract(reuse_last=True))
|
||||
for key, space in xdict.items():
|
||||
if not spaces.is_determined(space):
|
||||
root_node.append(key, space)
|
||||
@@ -196,7 +192,7 @@ def get_transformer(config):
|
||||
model = SuperTransformer(
|
||||
d_feat=config.get("d_feat"),
|
||||
stem_dim=config.get("stem_dim"),
|
||||
embed_dims=config.get("embed_dims"),
|
||||
embed_dim=config.get("embed_dim"),
|
||||
num_heads=config.get("num_heads"),
|
||||
mlp_hidden_multipliers=config.get("mlp_hidden_multipliers"),
|
||||
qkv_bias=config.get("qkv_bias"),
|
||||
|
Reference in New Issue
Block a user