marian-tedtalks-id-en-enhanced / model_config.json
dhintech's picture
Upload enhanced MarianMT Indonesian-English model with meeting domain adaptation
da2b0f2 verified
{
"model_name": "Enhanced MarianMT Meeting Translation ID-EN",
"base_model": "Helsinki-NLP/opus-mt-id-en",
"enhancement_date": "2025-05-28T12:42:35.765269",
"best_bleu_score": 11.746771868146594,
"baseline_bleu": 9.146153343607343,
"improvement": 2.60061852453925,
"training_epochs": 12,
"dataset_composition": {
"tedtalks_percentage": 0.8,
"meeting_domain_percentage": 0.1,
"total_samples": 69138
},
"specialization": "meeting_domain_adaptation",
"hyperparameters": {
"max_length": 128,
"batch_size": 6,
"learning_rate": 1e-05,
"weight_decay": 0.01,
"gradient_clip": 1.0,
"warmup_ratio": 0.15
},
"performance": {
"target_bleu": "> baseline",
"target_speed": "< 1.5s",
"achieved_bleu": 11.746771868146594,
"achieved_speed": 0.11984974145889282,
"bleu_achieved": true,
"speed_achieved": true
},
"enhancements": [
"domain_specific_meeting_data",
"tedtalks_large_dataset",
"enhanced_learning_rate",
"robust_evaluation",
"longer_max_length",
"meeting_vocabulary_adaptation"
]
}