Spaces:
Running on Zero
Running on Zero
Update
Browse files- README.md +2 -1
- app.py +13 -24
- pyproject.toml +13 -5
- requirements.txt +191 -85
- style.css +13 -37
- uv.lock +0 -0
README.md
CHANGED
|
@@ -4,7 +4,8 @@ emoji: 😻
|
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 6.
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
|
|
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 6.10.0
|
| 8 |
+
python_version: "3.12.12"
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
---
|
app.py
CHANGED
|
@@ -12,6 +12,8 @@ from transformers import KyutaiSpeechToTextForConditionalGeneration, KyutaiSpeec
|
|
| 12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
model_id = "kyutai/stt-2.6b-en-trfs"
|
| 14 |
model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=device, torch_dtype="auto")
|
|
|
|
|
|
|
| 15 |
processor = KyutaiSpeechToTextProcessor.from_pretrained(model_id)
|
| 16 |
|
| 17 |
SAMPLE_RATE = 24000
|
|
@@ -37,11 +39,10 @@ def transcribe(audio_path: str) -> str:
|
|
| 37 |
data = data[:MAX_SAMPLE_SIZE]
|
| 38 |
gr.Info(f"Audio file is too long. Truncating to {MAX_DURATION} seconds.")
|
| 39 |
|
| 40 |
-
inputs = processor(data)
|
| 41 |
inputs.to(device)
|
| 42 |
output_tokens = model.generate(**inputs)
|
| 43 |
-
|
| 44 |
-
return output[0]
|
| 45 |
|
| 46 |
|
| 47 |
with gr.Blocks(fill_height=False) as demo:
|
|
@@ -61,7 +62,7 @@ with gr.Blocks(fill_height=False) as demo:
|
|
| 61 |
""") # noqa: RUF001
|
| 62 |
|
| 63 |
# Main content
|
| 64 |
-
with gr.
|
| 65 |
# Audio input
|
| 66 |
audio = gr.Audio(
|
| 67 |
label="🎵 Audio Input",
|
|
@@ -88,26 +89,14 @@ with gr.Blocks(fill_height=False) as demo:
|
|
| 88 |
)
|
| 89 |
|
| 90 |
# Examples section
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
# Footer
|
| 102 |
-
gr.HTML("""
|
| 103 |
-
<div class="footer-container">
|
| 104 |
-
<p>
|
| 105 |
-
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" class="footer-link" target="_blank">anycoder</a> •
|
| 106 |
-
Powered by <a href="https://huggingface.co/kyutai/stt-2.6b-en-trfs" class="footer-link" target="_blank">Kyutai STT 2.6B</a>
|
| 107 |
-
</p>
|
| 108 |
-
</div>
|
| 109 |
-
""")
|
| 110 |
-
|
| 111 |
# Event handlers
|
| 112 |
transcribe_btn.click(
|
| 113 |
fn=transcribe,
|
|
|
|
| 12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
model_id = "kyutai/stt-2.6b-en-trfs"
|
| 14 |
model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=device, torch_dtype="auto")
|
| 15 |
+
model.generation_config.disable_compile = True
|
| 16 |
+
model.generation_config.cache_implementation = "static"
|
| 17 |
processor = KyutaiSpeechToTextProcessor.from_pretrained(model_id)
|
| 18 |
|
| 19 |
SAMPLE_RATE = 24000
|
|
|
|
| 39 |
data = data[:MAX_SAMPLE_SIZE]
|
| 40 |
gr.Info(f"Audio file is too long. Truncating to {MAX_DURATION} seconds.")
|
| 41 |
|
| 42 |
+
inputs = processor(audio=data)
|
| 43 |
inputs.to(device)
|
| 44 |
output_tokens = model.generate(**inputs)
|
| 45 |
+
return processor.decode(output_tokens, skip_special_tokens=True)[0]
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
with gr.Blocks(fill_height=False) as demo:
|
|
|
|
| 62 |
""") # noqa: RUF001
|
| 63 |
|
| 64 |
# Main content
|
| 65 |
+
with gr.Column(elem_classes="main-card"):
|
| 66 |
# Audio input
|
| 67 |
audio = gr.Audio(
|
| 68 |
label="🎵 Audio Input",
|
|
|
|
| 89 |
)
|
| 90 |
|
| 91 |
# Examples section
|
| 92 |
+
gr.Markdown("### 💡 Try These Examples", elem_classes="examples-container")
|
| 93 |
+
gr.Examples(
|
| 94 |
+
examples=sorted(pathlib.Path("assets").glob("*.wav")) if pathlib.Path("assets").exists() else [],
|
| 95 |
+
inputs=audio,
|
| 96 |
+
outputs=output,
|
| 97 |
+
fn=transcribe,
|
| 98 |
+
examples_per_page=5,
|
| 99 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
# Event handlers
|
| 101 |
transcribe_btn.click(
|
| 102 |
fn=transcribe,
|
pyproject.toml
CHANGED
|
@@ -5,12 +5,20 @@ description = ""
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.10"
|
| 7 |
dependencies = [
|
| 8 |
-
"accelerate>=1.
|
| 9 |
-
"gradio[mcp]>=6.
|
| 10 |
"librosa>=0.11.0",
|
| 11 |
-
"spaces>=0.
|
| 12 |
-
"torch==2.
|
| 13 |
-
"transformers==4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
]
|
| 15 |
|
| 16 |
[tool.ruff]
|
|
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.10"
|
| 7 |
dependencies = [
|
| 8 |
+
"accelerate>=1.13.0",
|
| 9 |
+
"gradio[mcp]>=6.10.0",
|
| 10 |
"librosa>=0.11.0",
|
| 11 |
+
"spaces>=0.48.1",
|
| 12 |
+
"torch==2.9.1",
|
| 13 |
+
"transformers==5.4.0",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
[dependency-groups]
|
| 17 |
+
dev = [
|
| 18 |
+
"ruff>=0.15.8",
|
| 19 |
+
]
|
| 20 |
+
hf-spaces = [
|
| 21 |
+
"datasets",
|
| 22 |
]
|
| 23 |
|
| 24 |
[tool.ruff]
|
requirements.txt
CHANGED
|
@@ -1,29 +1,45 @@
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
-
# uv
|
| 3 |
-
accelerate==1.
|
| 4 |
-
# via kyutai-stt-2-6b-en
|
| 5 |
aiofiles==24.1.0
|
| 6 |
# via gradio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
annotated-doc==0.0.4
|
| 8 |
-
# via
|
|
|
|
|
|
|
| 9 |
annotated-types==0.7.0
|
| 10 |
# via pydantic
|
| 11 |
-
anyio==4.
|
| 12 |
# via
|
| 13 |
# gradio
|
| 14 |
# httpx
|
| 15 |
# mcp
|
| 16 |
# sse-starlette
|
| 17 |
# starlette
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
# via
|
|
|
|
| 20 |
# jsonschema
|
| 21 |
# referencing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
audioread==3.1.0
|
| 23 |
# via librosa
|
| 24 |
brotli==1.2.0
|
| 25 |
# via gradio
|
| 26 |
-
certifi==
|
| 27 |
# via
|
| 28 |
# httpcore
|
| 29 |
# httpx
|
|
@@ -32,60 +48,81 @@ cffi==2.0.0
|
|
| 32 |
# via
|
| 33 |
# cryptography
|
| 34 |
# soundfile
|
| 35 |
-
charset-normalizer==3.4.
|
| 36 |
# via requests
|
| 37 |
click==8.3.1
|
| 38 |
# via
|
| 39 |
# typer
|
| 40 |
# uvicorn
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# via pyjwt
|
|
|
|
| 43 |
decorator==5.2.1
|
| 44 |
# via librosa
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
# via anyio
|
| 47 |
-
fastapi==0.
|
| 48 |
# via gradio
|
| 49 |
ffmpy==1.0.0
|
| 50 |
# via gradio
|
| 51 |
-
filelock==3.
|
| 52 |
# via
|
|
|
|
| 53 |
# huggingface-hub
|
| 54 |
# torch
|
| 55 |
-
|
| 56 |
-
fsspec==2025.12.0
|
| 57 |
# via
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# gradio-client
|
| 59 |
# huggingface-hub
|
| 60 |
# torch
|
| 61 |
-
gradio==6.
|
| 62 |
# via
|
| 63 |
-
# kyutai-stt-2-6b-en
|
| 64 |
# spaces
|
| 65 |
-
gradio-client==2.
|
| 66 |
-
# via
|
|
|
|
|
|
|
| 67 |
groovy==0.1.2
|
| 68 |
# via gradio
|
| 69 |
h11==0.16.0
|
| 70 |
# via
|
| 71 |
# httpcore
|
| 72 |
# uvicorn
|
| 73 |
-
hf-
|
|
|
|
|
|
|
| 74 |
# via huggingface-hub
|
| 75 |
httpcore==1.0.9
|
| 76 |
# via httpx
|
| 77 |
httpx==0.28.1
|
| 78 |
# via
|
|
|
|
| 79 |
# gradio
|
| 80 |
# gradio-client
|
|
|
|
| 81 |
# mcp
|
| 82 |
# safehttpx
|
| 83 |
# spaces
|
| 84 |
httpx-sse==0.4.3
|
| 85 |
# via mcp
|
| 86 |
-
huggingface-hub==
|
| 87 |
# via
|
| 88 |
# accelerate
|
|
|
|
| 89 |
# gradio
|
| 90 |
# gradio-client
|
| 91 |
# tokenizers
|
|
@@ -95,23 +132,24 @@ idna==3.11
|
|
| 95 |
# anyio
|
| 96 |
# httpx
|
| 97 |
# requests
|
|
|
|
| 98 |
jinja2==3.1.6
|
| 99 |
# via
|
| 100 |
# gradio
|
| 101 |
# torch
|
| 102 |
-
joblib==1.5.
|
| 103 |
# via
|
| 104 |
# librosa
|
| 105 |
# scikit-learn
|
| 106 |
-
jsonschema==4.
|
| 107 |
# via mcp
|
| 108 |
jsonschema-specifications==2025.9.1
|
| 109 |
# via jsonschema
|
| 110 |
-
lazy-loader==0.
|
| 111 |
# via librosa
|
| 112 |
librosa==0.11.0
|
| 113 |
-
# via kyutai-stt-2-6b-en
|
| 114 |
-
llvmlite==0.
|
| 115 |
# via numba
|
| 116 |
markdown-it-py==4.0.0
|
| 117 |
# via rich
|
|
@@ -119,7 +157,7 @@ markupsafe==3.0.3
|
|
| 119 |
# via
|
| 120 |
# gradio
|
| 121 |
# jinja2
|
| 122 |
-
mcp==1.
|
| 123 |
# via gradio
|
| 124 |
mdurl==0.1.2
|
| 125 |
# via markdown-it-py
|
|
@@ -127,13 +165,35 @@ mpmath==1.3.0
|
|
| 127 |
# via sympy
|
| 128 |
msgpack==1.1.2
|
| 129 |
# via librosa
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
# via torch
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
# via librosa
|
| 134 |
-
numpy==2.2.6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
# via
|
| 136 |
# accelerate
|
|
|
|
| 137 |
# gradio
|
| 138 |
# librosa
|
| 139 |
# numba
|
|
@@ -143,48 +203,51 @@ numpy==2.2.6
|
|
| 143 |
# soundfile
|
| 144 |
# soxr
|
| 145 |
# transformers
|
| 146 |
-
nvidia-cublas-cu12==12.8.4.1
|
| 147 |
# via
|
| 148 |
# nvidia-cudnn-cu12
|
| 149 |
# nvidia-cusolver-cu12
|
| 150 |
# torch
|
| 151 |
-
nvidia-cuda-cupti-cu12==12.8.90
|
| 152 |
# via torch
|
| 153 |
-
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 154 |
# via torch
|
| 155 |
-
nvidia-cuda-runtime-cu12==12.8.90
|
| 156 |
# via torch
|
| 157 |
-
nvidia-cudnn-cu12==9.10.2.21
|
| 158 |
# via torch
|
| 159 |
-
nvidia-cufft-cu12==11.3.3.83
|
| 160 |
# via torch
|
| 161 |
-
nvidia-cufile-cu12==1.13.1.3
|
| 162 |
# via torch
|
| 163 |
-
nvidia-curand-cu12==10.3.9.90
|
| 164 |
# via torch
|
| 165 |
-
nvidia-cusolver-cu12==11.7.3.90
|
| 166 |
# via torch
|
| 167 |
-
nvidia-cusparse-cu12==12.5.8.93
|
| 168 |
# via
|
| 169 |
# nvidia-cusolver-cu12
|
| 170 |
# torch
|
| 171 |
-
nvidia-cusparselt-cu12==0.7.1
|
| 172 |
# via torch
|
| 173 |
-
nvidia-nccl-cu12==2.27.
|
| 174 |
# via torch
|
| 175 |
-
nvidia-nvjitlink-cu12==12.8.93
|
| 176 |
# via
|
| 177 |
# nvidia-cufft-cu12
|
| 178 |
# nvidia-cusolver-cu12
|
| 179 |
# nvidia-cusparse-cu12
|
| 180 |
# torch
|
| 181 |
-
nvidia-
|
|
|
|
|
|
|
| 182 |
# via torch
|
| 183 |
-
orjson==3.11.
|
| 184 |
# via gradio
|
| 185 |
-
packaging==
|
| 186 |
# via
|
| 187 |
# accelerate
|
|
|
|
| 188 |
# gradio
|
| 189 |
# gradio-client
|
| 190 |
# huggingface-hub
|
|
@@ -192,21 +255,33 @@ packaging==25.0
|
|
| 192 |
# pooch
|
| 193 |
# spaces
|
| 194 |
# transformers
|
| 195 |
-
pandas==2.3.3
|
| 196 |
-
# via
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
# via gradio
|
| 199 |
-
platformdirs==4.
|
| 200 |
# via pooch
|
| 201 |
-
pooch==1.
|
| 202 |
# via librosa
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
psutil==5.9.8
|
| 204 |
# via
|
| 205 |
# accelerate
|
| 206 |
# spaces
|
| 207 |
-
|
|
|
|
|
|
|
| 208 |
# via cffi
|
| 209 |
-
pydantic==2.12.
|
| 210 |
# via
|
| 211 |
# fastapi
|
| 212 |
# gradio
|
|
@@ -215,27 +290,32 @@ pydantic==2.12.4
|
|
| 215 |
# spaces
|
| 216 |
pydantic-core==2.41.5
|
| 217 |
# via pydantic
|
| 218 |
-
pydantic-settings==2.
|
| 219 |
# via mcp
|
| 220 |
pydub==0.25.1
|
| 221 |
# via gradio
|
| 222 |
-
pygments==2.
|
| 223 |
# via rich
|
| 224 |
-
pyjwt==2.
|
| 225 |
# via mcp
|
| 226 |
python-dateutil==2.9.0.post0
|
| 227 |
# via pandas
|
| 228 |
-
python-dotenv==1.2.
|
| 229 |
# via pydantic-settings
|
| 230 |
-
python-multipart==0.0.
|
| 231 |
# via
|
| 232 |
# gradio
|
| 233 |
# mcp
|
| 234 |
-
pytz==
|
| 235 |
-
# via
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
pyyaml==6.0.3
|
| 237 |
# via
|
| 238 |
# accelerate
|
|
|
|
| 239 |
# gradio
|
| 240 |
# huggingface-hub
|
| 241 |
# transformers
|
|
@@ -243,15 +323,14 @@ referencing==0.37.0
|
|
| 243 |
# via
|
| 244 |
# jsonschema
|
| 245 |
# jsonschema-specifications
|
| 246 |
-
regex==
|
| 247 |
# via transformers
|
| 248 |
-
requests==2.
|
| 249 |
# via
|
| 250 |
-
#
|
| 251 |
# pooch
|
| 252 |
# spaces
|
| 253 |
-
|
| 254 |
-
rich==14.2.0
|
| 255 |
# via typer
|
| 256 |
rpds-py==0.30.0
|
| 257 |
# via
|
|
@@ -263,16 +342,22 @@ safetensors==0.7.0
|
|
| 263 |
# via
|
| 264 |
# accelerate
|
| 265 |
# transformers
|
| 266 |
-
scikit-learn==1.7.2
|
| 267 |
# via librosa
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
# via
|
| 270 |
# librosa
|
| 271 |
# scikit-learn
|
| 272 |
semantic-version==2.10.0
|
| 273 |
# via gradio
|
| 274 |
-
setuptools==
|
| 275 |
-
# via
|
| 276 |
shellingham==1.5.4
|
| 277 |
# via typer
|
| 278 |
six==1.17.0
|
|
@@ -281,39 +366,54 @@ soundfile==0.13.1
|
|
| 281 |
# via librosa
|
| 282 |
soxr==1.0.0
|
| 283 |
# via librosa
|
| 284 |
-
|
| 285 |
-
# via kyutai-stt-2-6b-en (pyproject.toml)
|
| 286 |
-
sse-starlette==3.0.3
|
| 287 |
# via mcp
|
| 288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
# via
|
| 290 |
# fastapi
|
| 291 |
# gradio
|
| 292 |
# mcp
|
|
|
|
| 293 |
sympy==1.14.0
|
| 294 |
# via torch
|
| 295 |
threadpoolctl==3.6.0
|
| 296 |
# via scikit-learn
|
| 297 |
-
tokenizers==0.
|
| 298 |
# via transformers
|
| 299 |
tomlkit==0.13.3
|
| 300 |
# via gradio
|
| 301 |
-
torch==2.
|
| 302 |
# via
|
| 303 |
-
# kyutai-stt-2-6b-en (pyproject.toml)
|
| 304 |
# accelerate
|
| 305 |
-
|
|
|
|
| 306 |
# via
|
|
|
|
| 307 |
# huggingface-hub
|
| 308 |
# transformers
|
| 309 |
-
transformers==4.
|
| 310 |
-
# via kyutai-stt-2-6b-en
|
| 311 |
-
triton==3.
|
| 312 |
# via torch
|
| 313 |
-
typer==0.
|
| 314 |
-
# via
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
typing-extensions==4.15.0
|
| 316 |
# via
|
|
|
|
| 317 |
# anyio
|
| 318 |
# cryptography
|
| 319 |
# exceptiongroup
|
|
@@ -323,25 +423,31 @@ typing-extensions==4.15.0
|
|
| 323 |
# huggingface-hub
|
| 324 |
# librosa
|
| 325 |
# mcp
|
|
|
|
| 326 |
# pydantic
|
| 327 |
# pydantic-core
|
|
|
|
| 328 |
# referencing
|
| 329 |
# spaces
|
| 330 |
# starlette
|
| 331 |
# torch
|
| 332 |
-
# typer
|
| 333 |
# typing-inspection
|
| 334 |
# uvicorn
|
| 335 |
typing-inspection==0.4.2
|
| 336 |
# via
|
|
|
|
| 337 |
# mcp
|
| 338 |
# pydantic
|
| 339 |
# pydantic-settings
|
| 340 |
-
tzdata==2025.
|
| 341 |
# via pandas
|
| 342 |
-
urllib3==2.
|
| 343 |
# via requests
|
| 344 |
-
uvicorn==0.
|
| 345 |
# via
|
| 346 |
# gradio
|
| 347 |
# mcp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
+
# uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim --no-emit-package spaces -o requirements.txt
|
| 3 |
+
accelerate==1.13.0
|
| 4 |
+
# via kyutai-stt-2-6b-en
|
| 5 |
aiofiles==24.1.0
|
| 6 |
# via gradio
|
| 7 |
+
aiohappyeyeballs==2.6.1
|
| 8 |
+
# via aiohttp
|
| 9 |
+
aiohttp==3.13.4
|
| 10 |
+
# via fsspec
|
| 11 |
+
aiosignal==1.4.0
|
| 12 |
+
# via aiohttp
|
| 13 |
annotated-doc==0.0.4
|
| 14 |
+
# via
|
| 15 |
+
# fastapi
|
| 16 |
+
# typer
|
| 17 |
annotated-types==0.7.0
|
| 18 |
# via pydantic
|
| 19 |
+
anyio==4.13.0
|
| 20 |
# via
|
| 21 |
# gradio
|
| 22 |
# httpx
|
| 23 |
# mcp
|
| 24 |
# sse-starlette
|
| 25 |
# starlette
|
| 26 |
+
async-timeout==5.0.1 ; python_full_version < '3.11'
|
| 27 |
+
# via aiohttp
|
| 28 |
+
attrs==26.1.0
|
| 29 |
# via
|
| 30 |
+
# aiohttp
|
| 31 |
# jsonschema
|
| 32 |
# referencing
|
| 33 |
+
audioop-lts==0.2.2 ; python_full_version >= '3.13'
|
| 34 |
+
# via
|
| 35 |
+
# gradio
|
| 36 |
+
# standard-aifc
|
| 37 |
+
# standard-sunau
|
| 38 |
audioread==3.1.0
|
| 39 |
# via librosa
|
| 40 |
brotli==1.2.0
|
| 41 |
# via gradio
|
| 42 |
+
certifi==2026.2.25
|
| 43 |
# via
|
| 44 |
# httpcore
|
| 45 |
# httpx
|
|
|
|
| 48 |
# via
|
| 49 |
# cryptography
|
| 50 |
# soundfile
|
| 51 |
+
charset-normalizer==3.4.6
|
| 52 |
# via requests
|
| 53 |
click==8.3.1
|
| 54 |
# via
|
| 55 |
# typer
|
| 56 |
# uvicorn
|
| 57 |
+
colorama==0.4.6 ; sys_platform == 'win32'
|
| 58 |
+
# via
|
| 59 |
+
# click
|
| 60 |
+
# tqdm
|
| 61 |
+
cryptography==46.0.6
|
| 62 |
# via pyjwt
|
| 63 |
+
datasets==4.8.4
|
| 64 |
decorator==5.2.1
|
| 65 |
# via librosa
|
| 66 |
+
dill==0.4.1
|
| 67 |
+
# via
|
| 68 |
+
# datasets
|
| 69 |
+
# multiprocess
|
| 70 |
+
exceptiongroup==1.3.1 ; python_full_version < '3.11'
|
| 71 |
# via anyio
|
| 72 |
+
fastapi==0.135.2
|
| 73 |
# via gradio
|
| 74 |
ffmpy==1.0.0
|
| 75 |
# via gradio
|
| 76 |
+
filelock==3.25.2
|
| 77 |
# via
|
| 78 |
+
# datasets
|
| 79 |
# huggingface-hub
|
| 80 |
# torch
|
| 81 |
+
frozenlist==1.8.0
|
|
|
|
| 82 |
# via
|
| 83 |
+
# aiohttp
|
| 84 |
+
# aiosignal
|
| 85 |
+
fsspec==2026.2.0
|
| 86 |
+
# via
|
| 87 |
+
# datasets
|
| 88 |
# gradio-client
|
| 89 |
# huggingface-hub
|
| 90 |
# torch
|
| 91 |
+
gradio==6.10.0
|
| 92 |
# via
|
| 93 |
+
# kyutai-stt-2-6b-en
|
| 94 |
# spaces
|
| 95 |
+
gradio-client==2.4.0
|
| 96 |
+
# via
|
| 97 |
+
# gradio
|
| 98 |
+
# hf-gradio
|
| 99 |
groovy==0.1.2
|
| 100 |
# via gradio
|
| 101 |
h11==0.16.0
|
| 102 |
# via
|
| 103 |
# httpcore
|
| 104 |
# uvicorn
|
| 105 |
+
hf-gradio==0.3.0
|
| 106 |
+
# via gradio
|
| 107 |
+
hf-xet==1.4.2 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
| 108 |
# via huggingface-hub
|
| 109 |
httpcore==1.0.9
|
| 110 |
# via httpx
|
| 111 |
httpx==0.28.1
|
| 112 |
# via
|
| 113 |
+
# datasets
|
| 114 |
# gradio
|
| 115 |
# gradio-client
|
| 116 |
+
# huggingface-hub
|
| 117 |
# mcp
|
| 118 |
# safehttpx
|
| 119 |
# spaces
|
| 120 |
httpx-sse==0.4.3
|
| 121 |
# via mcp
|
| 122 |
+
huggingface-hub==1.8.0
|
| 123 |
# via
|
| 124 |
# accelerate
|
| 125 |
+
# datasets
|
| 126 |
# gradio
|
| 127 |
# gradio-client
|
| 128 |
# tokenizers
|
|
|
|
| 132 |
# anyio
|
| 133 |
# httpx
|
| 134 |
# requests
|
| 135 |
+
# yarl
|
| 136 |
jinja2==3.1.6
|
| 137 |
# via
|
| 138 |
# gradio
|
| 139 |
# torch
|
| 140 |
+
joblib==1.5.3
|
| 141 |
# via
|
| 142 |
# librosa
|
| 143 |
# scikit-learn
|
| 144 |
+
jsonschema==4.26.0
|
| 145 |
# via mcp
|
| 146 |
jsonschema-specifications==2025.9.1
|
| 147 |
# via jsonschema
|
| 148 |
+
lazy-loader==0.5
|
| 149 |
# via librosa
|
| 150 |
librosa==0.11.0
|
| 151 |
+
# via kyutai-stt-2-6b-en
|
| 152 |
+
llvmlite==0.46.0
|
| 153 |
# via numba
|
| 154 |
markdown-it-py==4.0.0
|
| 155 |
# via rich
|
|
|
|
| 157 |
# via
|
| 158 |
# gradio
|
| 159 |
# jinja2
|
| 160 |
+
mcp==1.26.0
|
| 161 |
# via gradio
|
| 162 |
mdurl==0.1.2
|
| 163 |
# via markdown-it-py
|
|
|
|
| 165 |
# via sympy
|
| 166 |
msgpack==1.1.2
|
| 167 |
# via librosa
|
| 168 |
+
multidict==6.7.1
|
| 169 |
+
# via
|
| 170 |
+
# aiohttp
|
| 171 |
+
# yarl
|
| 172 |
+
multiprocess==0.70.19
|
| 173 |
+
# via datasets
|
| 174 |
+
networkx==3.4.2 ; python_full_version < '3.11'
|
| 175 |
# via torch
|
| 176 |
+
networkx==3.6.1 ; python_full_version >= '3.11'
|
| 177 |
+
# via torch
|
| 178 |
+
numba==0.64.0
|
| 179 |
# via librosa
|
| 180 |
+
numpy==2.2.6 ; python_full_version < '3.11'
|
| 181 |
+
# via
|
| 182 |
+
# accelerate
|
| 183 |
+
# datasets
|
| 184 |
+
# gradio
|
| 185 |
+
# librosa
|
| 186 |
+
# numba
|
| 187 |
+
# pandas
|
| 188 |
+
# scikit-learn
|
| 189 |
+
# scipy
|
| 190 |
+
# soundfile
|
| 191 |
+
# soxr
|
| 192 |
+
# transformers
|
| 193 |
+
numpy==2.4.4 ; python_full_version >= '3.11'
|
| 194 |
# via
|
| 195 |
# accelerate
|
| 196 |
+
# datasets
|
| 197 |
# gradio
|
| 198 |
# librosa
|
| 199 |
# numba
|
|
|
|
| 203 |
# soundfile
|
| 204 |
# soxr
|
| 205 |
# transformers
|
| 206 |
+
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 207 |
# via
|
| 208 |
# nvidia-cudnn-cu12
|
| 209 |
# nvidia-cusolver-cu12
|
| 210 |
# torch
|
| 211 |
+
nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 212 |
# via torch
|
| 213 |
+
nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 214 |
# via torch
|
| 215 |
+
nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 216 |
# via torch
|
| 217 |
+
nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 218 |
# via torch
|
| 219 |
+
nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 220 |
# via torch
|
| 221 |
+
nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 222 |
# via torch
|
| 223 |
+
nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 224 |
# via torch
|
| 225 |
+
nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 226 |
# via torch
|
| 227 |
+
nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 228 |
# via
|
| 229 |
# nvidia-cusolver-cu12
|
| 230 |
# torch
|
| 231 |
+
nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 232 |
# via torch
|
| 233 |
+
nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 234 |
# via torch
|
| 235 |
+
nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 236 |
# via
|
| 237 |
# nvidia-cufft-cu12
|
| 238 |
# nvidia-cusolver-cu12
|
| 239 |
# nvidia-cusparse-cu12
|
| 240 |
# torch
|
| 241 |
+
nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 242 |
+
# via torch
|
| 243 |
+
nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 244 |
# via torch
|
| 245 |
+
orjson==3.11.7
|
| 246 |
# via gradio
|
| 247 |
+
packaging==26.0
|
| 248 |
# via
|
| 249 |
# accelerate
|
| 250 |
+
# datasets
|
| 251 |
# gradio
|
| 252 |
# gradio-client
|
| 253 |
# huggingface-hub
|
|
|
|
| 255 |
# pooch
|
| 256 |
# spaces
|
| 257 |
# transformers
|
| 258 |
+
pandas==2.3.3 ; python_full_version < '3.11'
|
| 259 |
+
# via
|
| 260 |
+
# datasets
|
| 261 |
+
# gradio
|
| 262 |
+
pandas==3.0.2 ; python_full_version >= '3.11'
|
| 263 |
+
# via
|
| 264 |
+
# datasets
|
| 265 |
+
# gradio
|
| 266 |
+
pillow==12.1.1
|
| 267 |
# via gradio
|
| 268 |
+
platformdirs==4.9.4
|
| 269 |
# via pooch
|
| 270 |
+
pooch==1.9.0
|
| 271 |
# via librosa
|
| 272 |
+
propcache==0.4.1
|
| 273 |
+
# via
|
| 274 |
+
# aiohttp
|
| 275 |
+
# yarl
|
| 276 |
psutil==5.9.8
|
| 277 |
# via
|
| 278 |
# accelerate
|
| 279 |
# spaces
|
| 280 |
+
pyarrow==23.0.1
|
| 281 |
+
# via datasets
|
| 282 |
+
pycparser==3.0 ; implementation_name != 'PyPy'
|
| 283 |
# via cffi
|
| 284 |
+
pydantic==2.12.5
|
| 285 |
# via
|
| 286 |
# fastapi
|
| 287 |
# gradio
|
|
|
|
| 290 |
# spaces
|
| 291 |
pydantic-core==2.41.5
|
| 292 |
# via pydantic
|
| 293 |
+
pydantic-settings==2.13.1
|
| 294 |
# via mcp
|
| 295 |
pydub==0.25.1
|
| 296 |
# via gradio
|
| 297 |
+
pygments==2.20.0
|
| 298 |
# via rich
|
| 299 |
+
pyjwt==2.12.1
|
| 300 |
# via mcp
|
| 301 |
python-dateutil==2.9.0.post0
|
| 302 |
# via pandas
|
| 303 |
+
python-dotenv==1.2.2
|
| 304 |
# via pydantic-settings
|
| 305 |
+
python-multipart==0.0.22
|
| 306 |
# via
|
| 307 |
# gradio
|
| 308 |
# mcp
|
| 309 |
+
pytz==2026.1.post1
|
| 310 |
+
# via
|
| 311 |
+
# gradio
|
| 312 |
+
# pandas
|
| 313 |
+
pywin32==311 ; sys_platform == 'win32'
|
| 314 |
+
# via mcp
|
| 315 |
pyyaml==6.0.3
|
| 316 |
# via
|
| 317 |
# accelerate
|
| 318 |
+
# datasets
|
| 319 |
# gradio
|
| 320 |
# huggingface-hub
|
| 321 |
# transformers
|
|
|
|
| 323 |
# via
|
| 324 |
# jsonschema
|
| 325 |
# jsonschema-specifications
|
| 326 |
+
regex==2026.3.32
|
| 327 |
# via transformers
|
| 328 |
+
requests==2.33.1
|
| 329 |
# via
|
| 330 |
+
# datasets
|
| 331 |
# pooch
|
| 332 |
# spaces
|
| 333 |
+
rich==14.3.3
|
|
|
|
| 334 |
# via typer
|
| 335 |
rpds-py==0.30.0
|
| 336 |
# via
|
|
|
|
| 342 |
# via
|
| 343 |
# accelerate
|
| 344 |
# transformers
|
| 345 |
+
scikit-learn==1.7.2 ; python_full_version < '3.11'
|
| 346 |
# via librosa
|
| 347 |
+
scikit-learn==1.8.0 ; python_full_version >= '3.11'
|
| 348 |
+
# via librosa
|
| 349 |
+
scipy==1.15.3 ; python_full_version < '3.11'
|
| 350 |
+
# via
|
| 351 |
+
# librosa
|
| 352 |
+
# scikit-learn
|
| 353 |
+
scipy==1.17.1 ; python_full_version >= '3.11'
|
| 354 |
# via
|
| 355 |
# librosa
|
| 356 |
# scikit-learn
|
| 357 |
semantic-version==2.10.0
|
| 358 |
# via gradio
|
| 359 |
+
setuptools==82.0.1 ; python_full_version >= '3.12'
|
| 360 |
+
# via torch
|
| 361 |
shellingham==1.5.4
|
| 362 |
# via typer
|
| 363 |
six==1.17.0
|
|
|
|
| 366 |
# via librosa
|
| 367 |
soxr==1.0.0
|
| 368 |
# via librosa
|
| 369 |
+
sse-starlette==3.3.4
|
|
|
|
|
|
|
| 370 |
# via mcp
|
| 371 |
+
standard-aifc==3.13.0 ; python_full_version >= '3.13'
|
| 372 |
+
# via
|
| 373 |
+
# audioread
|
| 374 |
+
# librosa
|
| 375 |
+
standard-chunk==3.13.0 ; python_full_version >= '3.13'
|
| 376 |
+
# via standard-aifc
|
| 377 |
+
standard-sunau==3.13.0 ; python_full_version >= '3.13'
|
| 378 |
+
# via
|
| 379 |
+
# audioread
|
| 380 |
+
# librosa
|
| 381 |
+
starlette==0.52.1
|
| 382 |
# via
|
| 383 |
# fastapi
|
| 384 |
# gradio
|
| 385 |
# mcp
|
| 386 |
+
# sse-starlette
|
| 387 |
sympy==1.14.0
|
| 388 |
# via torch
|
| 389 |
threadpoolctl==3.6.0
|
| 390 |
# via scikit-learn
|
| 391 |
+
tokenizers==0.22.2
|
| 392 |
# via transformers
|
| 393 |
tomlkit==0.13.3
|
| 394 |
# via gradio
|
| 395 |
+
torch==2.9.1
|
| 396 |
# via
|
|
|
|
| 397 |
# accelerate
|
| 398 |
+
# kyutai-stt-2-6b-en
|
| 399 |
+
tqdm==4.67.3
|
| 400 |
# via
|
| 401 |
+
# datasets
|
| 402 |
# huggingface-hub
|
| 403 |
# transformers
|
| 404 |
+
transformers==5.4.0
|
| 405 |
+
# via kyutai-stt-2-6b-en
|
| 406 |
+
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
| 407 |
# via torch
|
| 408 |
+
typer==0.24.1
|
| 409 |
+
# via
|
| 410 |
+
# gradio
|
| 411 |
+
# hf-gradio
|
| 412 |
+
# huggingface-hub
|
| 413 |
+
# transformers
|
| 414 |
typing-extensions==4.15.0
|
| 415 |
# via
|
| 416 |
+
# aiosignal
|
| 417 |
# anyio
|
| 418 |
# cryptography
|
| 419 |
# exceptiongroup
|
|
|
|
| 423 |
# huggingface-hub
|
| 424 |
# librosa
|
| 425 |
# mcp
|
| 426 |
+
# multidict
|
| 427 |
# pydantic
|
| 428 |
# pydantic-core
|
| 429 |
+
# pyjwt
|
| 430 |
# referencing
|
| 431 |
# spaces
|
| 432 |
# starlette
|
| 433 |
# torch
|
|
|
|
| 434 |
# typing-inspection
|
| 435 |
# uvicorn
|
| 436 |
typing-inspection==0.4.2
|
| 437 |
# via
|
| 438 |
+
# fastapi
|
| 439 |
# mcp
|
| 440 |
# pydantic
|
| 441 |
# pydantic-settings
|
| 442 |
+
tzdata==2025.3 ; python_full_version < '3.11' or sys_platform == 'emscripten' or sys_platform == 'win32'
|
| 443 |
# via pandas
|
| 444 |
+
urllib3==2.6.3
|
| 445 |
# via requests
|
| 446 |
+
uvicorn==0.42.0
|
| 447 |
# via
|
| 448 |
# gradio
|
| 449 |
# mcp
|
| 450 |
+
xxhash==3.6.0
|
| 451 |
+
# via datasets
|
| 452 |
+
yarl==1.23.0
|
| 453 |
+
# via aiohttp
|
style.css
CHANGED
|
@@ -28,12 +28,21 @@
|
|
| 28 |
font-weight: 400;
|
| 29 |
}
|
| 30 |
|
| 31 |
-
/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
.main-card {
|
| 33 |
-
background: white;
|
| 34 |
border-radius: 1rem;
|
| 35 |
padding: 1.5rem;
|
| 36 |
-
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
| 37 |
margin-bottom: 1.5rem;
|
| 38 |
}
|
| 39 |
|
|
@@ -42,10 +51,9 @@
|
|
| 42 |
margin-bottom: 1.5rem;
|
| 43 |
}
|
| 44 |
|
| 45 |
-
/* Transcription output styling */
|
| 46 |
.transcription-output {
|
| 47 |
min-height: 120px;
|
| 48 |
-
background: #f8fafc;
|
| 49 |
border-radius: 0.75rem;
|
| 50 |
padding: 1rem;
|
| 51 |
font-size: 1rem;
|
|
@@ -57,27 +65,6 @@
|
|
| 57 |
margin-top: 2rem;
|
| 58 |
}
|
| 59 |
|
| 60 |
-
/* Footer */
|
| 61 |
-
.footer-container {
|
| 62 |
-
text-align: center;
|
| 63 |
-
padding: 1.5rem 1rem;
|
| 64 |
-
margin-top: 2rem;
|
| 65 |
-
border-top: 1px solid #e2e8f0;
|
| 66 |
-
font-size: 0.875rem;
|
| 67 |
-
color: #64748b;
|
| 68 |
-
}
|
| 69 |
-
|
| 70 |
-
.footer-link {
|
| 71 |
-
color: #667eea;
|
| 72 |
-
text-decoration: none;
|
| 73 |
-
font-weight: 500;
|
| 74 |
-
transition: color 0.2s;
|
| 75 |
-
}
|
| 76 |
-
|
| 77 |
-
.footer-link:hover {
|
| 78 |
-
color: #764ba2;
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
/* Mobile optimizations */
|
| 82 |
@media (max-width: 640px) {
|
| 83 |
.header-title {
|
|
@@ -111,14 +98,3 @@
|
|
| 111 |
transform: translateY(-2px) !important;
|
| 112 |
box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3) !important;
|
| 113 |
}
|
| 114 |
-
|
| 115 |
-
/* Info banner */
|
| 116 |
-
.info-banner {
|
| 117 |
-
background: #eff6ff;
|
| 118 |
-
border-left: 4px solid #3b82f6;
|
| 119 |
-
padding: 1rem;
|
| 120 |
-
border-radius: 0.5rem;
|
| 121 |
-
margin-bottom: 1.5rem;
|
| 122 |
-
font-size: 0.875rem;
|
| 123 |
-
color: #1e40af;
|
| 124 |
-
}
|
|
|
|
| 28 |
font-weight: 400;
|
| 29 |
}
|
| 30 |
|
| 31 |
+
/* Info banner */
|
| 32 |
+
.info-banner {
|
| 33 |
+
border-left: 4px solid #3b82f6;
|
| 34 |
+
padding: 1rem;
|
| 35 |
+
border-radius: 0.5rem;
|
| 36 |
+
margin-bottom: 1.5rem;
|
| 37 |
+
font-size: 0.875rem;
|
| 38 |
+
background: var(--background-fill-secondary);
|
| 39 |
+
color: var(--body-text-color);
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
/* Main content card — let gr.Group handle background */
|
| 43 |
.main-card {
|
|
|
|
| 44 |
border-radius: 1rem;
|
| 45 |
padding: 1.5rem;
|
|
|
|
| 46 |
margin-bottom: 1.5rem;
|
| 47 |
}
|
| 48 |
|
|
|
|
| 51 |
margin-bottom: 1.5rem;
|
| 52 |
}
|
| 53 |
|
| 54 |
+
/* Transcription output styling — no hardcoded background */
|
| 55 |
.transcription-output {
|
| 56 |
min-height: 120px;
|
|
|
|
| 57 |
border-radius: 0.75rem;
|
| 58 |
padding: 1rem;
|
| 59 |
font-size: 1rem;
|
|
|
|
| 65 |
margin-top: 2rem;
|
| 66 |
}
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
/* Mobile optimizations */
|
| 69 |
@media (max-width: 640px) {
|
| 70 |
.header-title {
|
|
|
|
| 98 |
transform: translateY(-2px) !important;
|
| 99 |
box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3) !important;
|
| 100 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|