{ "model_type": "distilbert", "architectures": [ "Hecto" ], "hidden_size": 768, "num_labels": 4, "id2label": { "0": "World", "1": "Sports", "2": "Business", "3": "Sci/Tech" }, "label2id": { "World": 0, "Sports": 1, "Business": 2, "Sci/Tech": 3 }, "moe_type": "heterogeneous", "experts": { "expert_0": { "type": "ffnn", "layers": [ 256, 128 ], "activation": "tanh" }, "expert_1": { "type": "gru", "input_dim": 256, "hidden_dim": 128, "bidirectional": false } }, "gating": { "type": "top1", "temperature": 1.5, "mlp_dims": [ 256, 128, 2 ], "regularization": { "entropy_loss": true, "load_balancing": true } }, "encoder": { "base_model": "distilbert-base-uncased", "freeze_encoder": false }, "transformers_version": "4.41.1" }