hecto-ffnn-gru / config.json
ruhzi's picture
Upload folder using huggingface_hub
1106384 verified
raw
history blame contribute delete
935 Bytes
{
"model_type": "distilbert",
"architectures": [
"Hecto"
],
"hidden_size": 768,
"num_labels": 4,
"id2label": {
"0": "World",
"1": "Sports",
"2": "Business",
"3": "Sci/Tech"
},
"label2id": {
"World": 0,
"Sports": 1,
"Business": 2,
"Sci/Tech": 3
},
"moe_type": "heterogeneous",
"experts": {
"expert_0": {
"type": "ffnn",
"layers": [
256,
128
],
"activation": "tanh"
},
"expert_1": {
"type": "gru",
"input_dim": 256,
"hidden_dim": 128,
"bidirectional": false
}
},
"gating": {
"type": "top1",
"temperature": 1.5,
"mlp_dims": [
256,
128,
2
],
"regularization": {
"entropy_loss": true,
"load_balancing": true
}
},
"encoder": {
"base_model": "distilbert-base-uncased",
"freeze_encoder": false
},
"transformers_version": "4.41.1"
}