Upload 9 files
Browse files- README.md +94 -3
- adapter_config.json +44 -0
- adapter_model.safetensors +3 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scaler.pt +3 -0
- scheduler.pt +3 -0
- trainer_state.json +0 -0
- training_args.bin +3 -0
README.md
CHANGED
|
@@ -1,3 +1,94 @@
|
|
| 1 |
-
---
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: openai/whisper-small
|
| 3 |
+
library_name: peft
|
| 4 |
+
tags:
|
| 5 |
+
- whisper
|
| 6 |
+
- asr
|
| 7 |
+
- uyghur
|
| 8 |
+
- lo-ra
|
| 9 |
+
- peft
|
| 10 |
+
language:
|
| 11 |
+
- ug
|
| 12 |
+
datasets:
|
| 13 |
+
- mozilla-foundation/common_voice_11_0
|
| 14 |
+
license: apache-2.0
|
| 15 |
+
metrics:
|
| 16 |
+
- wer
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
# Whisper Small Uyghur LoRA (Fine-tuned)
|
| 20 |
+
|
| 21 |
+
## ئۇچۇر (Description in Uyghur)
|
| 22 |
+
بۇ مودېل `openai/whisper-small` ئاساسىدا ئۇيغۇرچە نۇتۇقنى تونۇش ئۈچۈن مەخسۇس تەربىيەلەنگەن. بىز LoRA تېخنىكىسىنى ئىشلىتىپ، ئۇيغۇرچە ئاۋازلارنى يۇقىرى ئېنىقلىقتا تېكىستكە ئايلاندۇرۇش مەقسىتىگە يەتتۇق.
|
| 23 |
+
|
| 24 |
+
- **تەربىيەلەش سانلىق مەلۇماتى:** Mozilla Common Voice (Uyghur)
|
| 25 |
+
- **قاتتىق دېتال:** NVIDIA GeForce RTX 3060 (9 سائەت تەربىيەلەنگەن)
|
| 26 |
+
- **مەقسەت:** ئۇيغۇر تىلىنىڭ رەقەملىك ساھەدىكى تەرەققىياتى ۋە تىلنى قوغداشقا تۆھپە قوشۇش.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## Model Description (English)
|
| 31 |
+
This model is a fine-tuned version of **OpenAI Whisper Small** for Uyghur Speech Recognition (ASR). It was trained using **LoRA (Low-Rank Adaptation)**, resulting in a lightweight but highly accurate adapter (approx. 13MB).
|
| 32 |
+
|
| 33 |
+
- **Data Source:** [Mozilla Common Voice / Data Collective](https://community.mozilladatacollective.com/)
|
| 34 |
+
- **Hardware:** Trained on a single **NVIDIA RTX 3060** GPU for approximately **9 hours**.
|
| 35 |
+
- **Accuracy:** Fine-tuned to achieve high precision in recognizing Uyghur spoken language.
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## ⚙️ Training Details
|
| 40 |
+
- **Base Model:** `openai/whisper-small`
|
| 41 |
+
- **Method:** PEFT (LoRA)
|
| 42 |
+
- **Training Time:** ~9 hours
|
| 43 |
+
- **Optimizer:** AdamW
|
| 44 |
+
- **Adapter Size:** ~13.5 MB
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
## ⚠️ Disclaimer (ئاگاھلاندۇرۇش)
|
| 49 |
+
|
| 50 |
+
**English:** This model is released for research, educational, and language preservation purposes only. The developer strongly opposes the use of this technology for mass surveillance, human rights violations, or any form of discrimination.
|
| 51 |
+
|
| 52 |
+
**ئۇيغۇرچە:** بۇ مودېل پەقەت تەتقىقات، مائارىپ ۋە تىلنى قوغداش مەقسىتىدە ئېلان قىلىندى. بۇ تېخنىكىنى كۆزىتىش، كىشىلىك ھوقۇققا دەخلى-تەرۇز قىلىش ياكى كەمسىتىش خاراكتېرلىك ئىشلارغا ئىشلىتىشكە قەتئىي قارشى تۇرىمىز.
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## How to use
|
| 57 |
+
|
| 58 |
+
You can load this model using `PEFT` and `Transformers`. Since the processor is not included in this adapter-only repo, please load the processor from the base model.
|
| 59 |
+
|
| 60 |
+
```python
|
| 61 |
+
import torch
|
| 62 |
+
import librosa
|
| 63 |
+
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
| 64 |
+
from peft import PeftModel
|
| 65 |
+
|
| 66 |
+
# 1. Setup Model IDs
|
| 67 |
+
base_model_id = "openai/whisper-small"
|
| 68 |
+
peft_model_id = "xiwol/whisper-small-uyghur"
|
| 69 |
+
|
| 70 |
+
# 2. Load Processor from the base model
|
| 71 |
+
# Note: We specify language and task for Uyghur ASR
|
| 72 |
+
processor = WhisperProcessor.from_pretrained(base_model_id, language="uyghur", task="transcribe")
|
| 73 |
+
|
| 74 |
+
# 3. Load Base Model
|
| 75 |
+
base_model = WhisperForConditionalGeneration.from_pretrained(
|
| 76 |
+
base_model_id,
|
| 77 |
+
device_map="auto",
|
| 78 |
+
torch_dtype=torch.float16
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# 4. Load the LoRA Adapter from Hugging Face
|
| 82 |
+
model = PeftModel.from_pretrained(base_model, peft_model_id)
|
| 83 |
+
model.eval()
|
| 84 |
+
|
| 85 |
+
# 5. Inference Example
|
| 86 |
+
# Load your audio file (ensure 16kHz sampling rate)
|
| 87 |
+
# audio, _ = librosa.load("your_audio_file.mp3", sr=16000)
|
| 88 |
+
# input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to("cuda").half()
|
| 89 |
+
|
| 90 |
+
# Generate Transcription
|
| 91 |
+
# with torch.no_grad():
|
| 92 |
+
# predicted_ids = model.generate(input_features)
|
| 93 |
+
# transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
| 94 |
+
# print(f"Transcription: {transcription}")
|
adapter_config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alora_invocation_tokens": null,
|
| 3 |
+
"alpha_pattern": {},
|
| 4 |
+
"arrow_config": null,
|
| 5 |
+
"auto_mapping": {
|
| 6 |
+
"base_model_class": "WhisperForConditionalGeneration",
|
| 7 |
+
"parent_library": "transformers.models.whisper.modeling_whisper"
|
| 8 |
+
},
|
| 9 |
+
"base_model_name_or_path": "openai/whisper-small",
|
| 10 |
+
"bias": "none",
|
| 11 |
+
"corda_config": null,
|
| 12 |
+
"ensure_weight_tying": false,
|
| 13 |
+
"eva_config": null,
|
| 14 |
+
"exclude_modules": null,
|
| 15 |
+
"fan_in_fan_out": false,
|
| 16 |
+
"inference_mode": true,
|
| 17 |
+
"init_lora_weights": true,
|
| 18 |
+
"layer_replication": null,
|
| 19 |
+
"layers_pattern": null,
|
| 20 |
+
"layers_to_transform": null,
|
| 21 |
+
"loftq_config": {},
|
| 22 |
+
"lora_alpha": 64,
|
| 23 |
+
"lora_bias": false,
|
| 24 |
+
"lora_dropout": 0.05,
|
| 25 |
+
"megatron_config": null,
|
| 26 |
+
"megatron_core": "megatron.core",
|
| 27 |
+
"modules_to_save": null,
|
| 28 |
+
"peft_type": "LORA",
|
| 29 |
+
"peft_version": "0.18.0",
|
| 30 |
+
"qalora_group_size": 16,
|
| 31 |
+
"r": 32,
|
| 32 |
+
"rank_pattern": {},
|
| 33 |
+
"revision": null,
|
| 34 |
+
"target_modules": [
|
| 35 |
+
"v_proj",
|
| 36 |
+
"q_proj"
|
| 37 |
+
],
|
| 38 |
+
"target_parameters": null,
|
| 39 |
+
"task_type": null,
|
| 40 |
+
"trainable_token_indices": null,
|
| 41 |
+
"use_dora": false,
|
| 42 |
+
"use_qalora": false,
|
| 43 |
+
"use_rslora": false
|
| 44 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0cce8474c24115f1329c6ce93022f40c8aa84f87002d176dd9e8f003adf3037
|
| 3 |
+
size 14176064
|
optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e30db67afdf7c34b4feeb62ed33e5f592594b6fa899d7b9a84285eeb632b65c
|
| 3 |
+
size 4906682
|
rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:457bf6f19646bf1743939cefce9a5b0d5b49e96a6338d575bf53183fdea502f2
|
| 3 |
+
size 14244
|
scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84658d7b97b8473c1a84b0a3f3653f13be312a20606632621e9ddb7ba3dc9db7
|
| 3 |
+
size 988
|
scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d123dc2866296bea0f4743636e5cee7f4b45387f7f2e9dcf90078ff1c863039
|
| 3 |
+
size 1064
|
trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dbed90c84a6880a0fa13c58f65439bca66cc9920043d4e71a623fcb50155034
|
| 3 |
+
size 5496
|