hysts HF Staff commited on
Commit
a99051b
·
1 Parent(s): 6b12f2c
Files changed (6) hide show
  1. README.md +2 -1
  2. app.py +13 -24
  3. pyproject.toml +13 -5
  4. requirements.txt +191 -85
  5. style.css +13 -37
  6. uv.lock +0 -0
README.md CHANGED
@@ -4,7 +4,8 @@ emoji: 😻
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.0.2
 
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 6.10.0
8
+ python_version: "3.12.12"
9
  app_file: app.py
10
  pinned: false
11
  ---
app.py CHANGED
@@ -12,6 +12,8 @@ from transformers import KyutaiSpeechToTextForConditionalGeneration, KyutaiSpeec
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model_id = "kyutai/stt-2.6b-en-trfs"
14
  model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=device, torch_dtype="auto")
 
 
15
  processor = KyutaiSpeechToTextProcessor.from_pretrained(model_id)
16
 
17
  SAMPLE_RATE = 24000
@@ -37,11 +39,10 @@ def transcribe(audio_path: str) -> str:
37
  data = data[:MAX_SAMPLE_SIZE]
38
  gr.Info(f"Audio file is too long. Truncating to {MAX_DURATION} seconds.")
39
 
40
- inputs = processor(data)
41
  inputs.to(device)
42
  output_tokens = model.generate(**inputs)
43
- output = processor.batch_decode(output_tokens, skip_special_tokens=True)
44
- return output[0]
45
 
46
 
47
  with gr.Blocks(fill_height=False) as demo:
@@ -61,7 +62,7 @@ with gr.Blocks(fill_height=False) as demo:
61
  """) # noqa: RUF001
62
 
63
  # Main content
64
- with gr.Group(elem_classes="main-card"):
65
  # Audio input
66
  audio = gr.Audio(
67
  label="🎵 Audio Input",
@@ -88,26 +89,14 @@ with gr.Blocks(fill_height=False) as demo:
88
  )
89
 
90
  # Examples section
91
- with gr.Group(elem_classes="examples-container"):
92
- gr.Markdown("### 💡 Try These Examples")
93
- gr.Examples(
94
- examples=sorted(pathlib.Path("assets").glob("*.wav")) if pathlib.Path("assets").exists() else [],
95
- inputs=audio,
96
- outputs=output,
97
- fn=transcribe,
98
- examples_per_page=5,
99
- )
100
-
101
- # Footer
102
- gr.HTML("""
103
- <div class="footer-container">
104
- <p>
105
- Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" class="footer-link" target="_blank">anycoder</a> •
106
- Powered by <a href="https://huggingface.co/kyutai/stt-2.6b-en-trfs" class="footer-link" target="_blank">Kyutai STT 2.6B</a>
107
- </p>
108
- </div>
109
- """)
110
-
111
  # Event handlers
112
  transcribe_btn.click(
113
  fn=transcribe,
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model_id = "kyutai/stt-2.6b-en-trfs"
14
  model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=device, torch_dtype="auto")
15
+ model.generation_config.disable_compile = True
16
+ model.generation_config.cache_implementation = "static"
17
  processor = KyutaiSpeechToTextProcessor.from_pretrained(model_id)
18
 
19
  SAMPLE_RATE = 24000
 
39
  data = data[:MAX_SAMPLE_SIZE]
40
  gr.Info(f"Audio file is too long. Truncating to {MAX_DURATION} seconds.")
41
 
42
+ inputs = processor(audio=data)
43
  inputs.to(device)
44
  output_tokens = model.generate(**inputs)
45
+ return processor.decode(output_tokens, skip_special_tokens=True)[0]
 
46
 
47
 
48
  with gr.Blocks(fill_height=False) as demo:
 
62
  """) # noqa: RUF001
63
 
64
  # Main content
65
+ with gr.Column(elem_classes="main-card"):
66
  # Audio input
67
  audio = gr.Audio(
68
  label="🎵 Audio Input",
 
89
  )
90
 
91
  # Examples section
92
+ gr.Markdown("### 💡 Try These Examples", elem_classes="examples-container")
93
+ gr.Examples(
94
+ examples=sorted(pathlib.Path("assets").glob("*.wav")) if pathlib.Path("assets").exists() else [],
95
+ inputs=audio,
96
+ outputs=output,
97
+ fn=transcribe,
98
+ examples_per_page=5,
99
+ )
 
 
 
 
 
 
 
 
 
 
 
 
100
  # Event handlers
101
  transcribe_btn.click(
102
  fn=transcribe,
pyproject.toml CHANGED
@@ -5,12 +5,20 @@ description = ""
5
  readme = "README.md"
6
  requires-python = ">=3.10"
7
  dependencies = [
8
- "accelerate>=1.12.0",
9
- "gradio[mcp]>=6.0.2",
10
  "librosa>=0.11.0",
11
- "spaces>=0.44.0",
12
- "torch==2.8.0",
13
- "transformers==4.54.1",
 
 
 
 
 
 
 
 
14
  ]
15
 
16
  [tool.ruff]
 
5
  readme = "README.md"
6
  requires-python = ">=3.10"
7
  dependencies = [
8
+ "accelerate>=1.13.0",
9
+ "gradio[mcp]>=6.10.0",
10
  "librosa>=0.11.0",
11
+ "spaces>=0.48.1",
12
+ "torch==2.9.1",
13
+ "transformers==5.4.0",
14
+ ]
15
+
16
+ [dependency-groups]
17
+ dev = [
18
+ "ruff>=0.15.8",
19
+ ]
20
+ hf-spaces = [
21
+ "datasets",
22
  ]
23
 
24
  [tool.ruff]
requirements.txt CHANGED
@@ -1,29 +1,45 @@
1
  # This file was autogenerated by uv via the following command:
2
- # uv pip compile pyproject.toml -o requirements.txt
3
- accelerate==1.12.0
4
- # via kyutai-stt-2-6b-en (pyproject.toml)
5
  aiofiles==24.1.0
6
  # via gradio
 
 
 
 
 
 
7
  annotated-doc==0.0.4
8
- # via fastapi
 
 
9
  annotated-types==0.7.0
10
  # via pydantic
11
- anyio==4.12.0
12
  # via
13
  # gradio
14
  # httpx
15
  # mcp
16
  # sse-starlette
17
  # starlette
18
- attrs==25.4.0
 
 
19
  # via
 
20
  # jsonschema
21
  # referencing
 
 
 
 
 
22
  audioread==3.1.0
23
  # via librosa
24
  brotli==1.2.0
25
  # via gradio
26
- certifi==2025.11.12
27
  # via
28
  # httpcore
29
  # httpx
@@ -32,60 +48,81 @@ cffi==2.0.0
32
  # via
33
  # cryptography
34
  # soundfile
35
- charset-normalizer==3.4.4
36
  # via requests
37
  click==8.3.1
38
  # via
39
  # typer
40
  # uvicorn
41
- cryptography==46.0.3
 
 
 
 
42
  # via pyjwt
 
43
  decorator==5.2.1
44
  # via librosa
45
- exceptiongroup==1.3.1
 
 
 
 
46
  # via anyio
47
- fastapi==0.123.7
48
  # via gradio
49
  ffmpy==1.0.0
50
  # via gradio
51
- filelock==3.20.0
52
  # via
 
53
  # huggingface-hub
54
  # torch
55
- # transformers
56
- fsspec==2025.12.0
57
  # via
 
 
 
 
 
58
  # gradio-client
59
  # huggingface-hub
60
  # torch
61
- gradio==6.0.2
62
  # via
63
- # kyutai-stt-2-6b-en (pyproject.toml)
64
  # spaces
65
- gradio-client==2.0.1
66
- # via gradio
 
 
67
  groovy==0.1.2
68
  # via gradio
69
  h11==0.16.0
70
  # via
71
  # httpcore
72
  # uvicorn
73
- hf-xet==1.2.0
 
 
74
  # via huggingface-hub
75
  httpcore==1.0.9
76
  # via httpx
77
  httpx==0.28.1
78
  # via
 
79
  # gradio
80
  # gradio-client
 
81
  # mcp
82
  # safehttpx
83
  # spaces
84
  httpx-sse==0.4.3
85
  # via mcp
86
- huggingface-hub==0.36.0
87
  # via
88
  # accelerate
 
89
  # gradio
90
  # gradio-client
91
  # tokenizers
@@ -95,23 +132,24 @@ idna==3.11
95
  # anyio
96
  # httpx
97
  # requests
 
98
  jinja2==3.1.6
99
  # via
100
  # gradio
101
  # torch
102
- joblib==1.5.2
103
  # via
104
  # librosa
105
  # scikit-learn
106
- jsonschema==4.25.1
107
  # via mcp
108
  jsonschema-specifications==2025.9.1
109
  # via jsonschema
110
- lazy-loader==0.4
111
  # via librosa
112
  librosa==0.11.0
113
- # via kyutai-stt-2-6b-en (pyproject.toml)
114
- llvmlite==0.45.1
115
  # via numba
116
  markdown-it-py==4.0.0
117
  # via rich
@@ -119,7 +157,7 @@ markupsafe==3.0.3
119
  # via
120
  # gradio
121
  # jinja2
122
- mcp==1.23.1
123
  # via gradio
124
  mdurl==0.1.2
125
  # via markdown-it-py
@@ -127,13 +165,35 @@ mpmath==1.3.0
127
  # via sympy
128
  msgpack==1.1.2
129
  # via librosa
130
- networkx==3.4.2
 
 
 
 
 
 
131
  # via torch
132
- numba==0.62.1
 
 
133
  # via librosa
134
- numpy==2.2.6
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # via
136
  # accelerate
 
137
  # gradio
138
  # librosa
139
  # numba
@@ -143,48 +203,51 @@ numpy==2.2.6
143
  # soundfile
144
  # soxr
145
  # transformers
146
- nvidia-cublas-cu12==12.8.4.1
147
  # via
148
  # nvidia-cudnn-cu12
149
  # nvidia-cusolver-cu12
150
  # torch
151
- nvidia-cuda-cupti-cu12==12.8.90
152
  # via torch
153
- nvidia-cuda-nvrtc-cu12==12.8.93
154
  # via torch
155
- nvidia-cuda-runtime-cu12==12.8.90
156
  # via torch
157
- nvidia-cudnn-cu12==9.10.2.21
158
  # via torch
159
- nvidia-cufft-cu12==11.3.3.83
160
  # via torch
161
- nvidia-cufile-cu12==1.13.1.3
162
  # via torch
163
- nvidia-curand-cu12==10.3.9.90
164
  # via torch
165
- nvidia-cusolver-cu12==11.7.3.90
166
  # via torch
167
- nvidia-cusparse-cu12==12.5.8.93
168
  # via
169
  # nvidia-cusolver-cu12
170
  # torch
171
- nvidia-cusparselt-cu12==0.7.1
172
  # via torch
173
- nvidia-nccl-cu12==2.27.3
174
  # via torch
175
- nvidia-nvjitlink-cu12==12.8.93
176
  # via
177
  # nvidia-cufft-cu12
178
  # nvidia-cusolver-cu12
179
  # nvidia-cusparse-cu12
180
  # torch
181
- nvidia-nvtx-cu12==12.8.90
 
 
182
  # via torch
183
- orjson==3.11.4
184
  # via gradio
185
- packaging==25.0
186
  # via
187
  # accelerate
 
188
  # gradio
189
  # gradio-client
190
  # huggingface-hub
@@ -192,21 +255,33 @@ packaging==25.0
192
  # pooch
193
  # spaces
194
  # transformers
195
- pandas==2.3.3
196
- # via gradio
197
- pillow==12.0.0
 
 
 
 
 
 
198
  # via gradio
199
- platformdirs==4.5.0
200
  # via pooch
201
- pooch==1.8.2
202
  # via librosa
 
 
 
 
203
  psutil==5.9.8
204
  # via
205
  # accelerate
206
  # spaces
207
- pycparser==2.23
 
 
208
  # via cffi
209
- pydantic==2.12.4
210
  # via
211
  # fastapi
212
  # gradio
@@ -215,27 +290,32 @@ pydantic==2.12.4
215
  # spaces
216
  pydantic-core==2.41.5
217
  # via pydantic
218
- pydantic-settings==2.12.0
219
  # via mcp
220
  pydub==0.25.1
221
  # via gradio
222
- pygments==2.19.2
223
  # via rich
224
- pyjwt==2.10.1
225
  # via mcp
226
  python-dateutil==2.9.0.post0
227
  # via pandas
228
- python-dotenv==1.2.1
229
  # via pydantic-settings
230
- python-multipart==0.0.20
231
  # via
232
  # gradio
233
  # mcp
234
- pytz==2025.2
235
- # via pandas
 
 
 
 
236
  pyyaml==6.0.3
237
  # via
238
  # accelerate
 
239
  # gradio
240
  # huggingface-hub
241
  # transformers
@@ -243,15 +323,14 @@ referencing==0.37.0
243
  # via
244
  # jsonschema
245
  # jsonschema-specifications
246
- regex==2025.11.3
247
  # via transformers
248
- requests==2.32.5
249
  # via
250
- # huggingface-hub
251
  # pooch
252
  # spaces
253
- # transformers
254
- rich==14.2.0
255
  # via typer
256
  rpds-py==0.30.0
257
  # via
@@ -263,16 +342,22 @@ safetensors==0.7.0
263
  # via
264
  # accelerate
265
  # transformers
266
- scikit-learn==1.7.2
267
  # via librosa
268
- scipy==1.15.3
 
 
 
 
 
 
269
  # via
270
  # librosa
271
  # scikit-learn
272
  semantic-version==2.10.0
273
  # via gradio
274
- setuptools==80.9.0
275
- # via triton
276
  shellingham==1.5.4
277
  # via typer
278
  six==1.17.0
@@ -281,39 +366,54 @@ soundfile==0.13.1
281
  # via librosa
282
  soxr==1.0.0
283
  # via librosa
284
- spaces==0.44.0
285
- # via kyutai-stt-2-6b-en (pyproject.toml)
286
- sse-starlette==3.0.3
287
  # via mcp
288
- starlette==0.50.0
 
 
 
 
 
 
 
 
 
 
289
  # via
290
  # fastapi
291
  # gradio
292
  # mcp
 
293
  sympy==1.14.0
294
  # via torch
295
  threadpoolctl==3.6.0
296
  # via scikit-learn
297
- tokenizers==0.21.4
298
  # via transformers
299
  tomlkit==0.13.3
300
  # via gradio
301
- torch==2.8.0
302
  # via
303
- # kyutai-stt-2-6b-en (pyproject.toml)
304
  # accelerate
305
- tqdm==4.67.1
 
306
  # via
 
307
  # huggingface-hub
308
  # transformers
309
- transformers==4.54.1
310
- # via kyutai-stt-2-6b-en (pyproject.toml)
311
- triton==3.4.0
312
  # via torch
313
- typer==0.20.0
314
- # via gradio
 
 
 
 
315
  typing-extensions==4.15.0
316
  # via
 
317
  # anyio
318
  # cryptography
319
  # exceptiongroup
@@ -323,25 +423,31 @@ typing-extensions==4.15.0
323
  # huggingface-hub
324
  # librosa
325
  # mcp
 
326
  # pydantic
327
  # pydantic-core
 
328
  # referencing
329
  # spaces
330
  # starlette
331
  # torch
332
- # typer
333
  # typing-inspection
334
  # uvicorn
335
  typing-inspection==0.4.2
336
  # via
 
337
  # mcp
338
  # pydantic
339
  # pydantic-settings
340
- tzdata==2025.2
341
  # via pandas
342
- urllib3==2.5.0
343
  # via requests
344
- uvicorn==0.38.0
345
  # via
346
  # gradio
347
  # mcp
 
 
 
 
 
1
  # This file was autogenerated by uv via the following command:
2
+ # uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim --no-emit-package spaces -o requirements.txt
3
+ accelerate==1.13.0
4
+ # via kyutai-stt-2-6b-en
5
  aiofiles==24.1.0
6
  # via gradio
7
+ aiohappyeyeballs==2.6.1
8
+ # via aiohttp
9
+ aiohttp==3.13.4
10
+ # via fsspec
11
+ aiosignal==1.4.0
12
+ # via aiohttp
13
  annotated-doc==0.0.4
14
+ # via
15
+ # fastapi
16
+ # typer
17
  annotated-types==0.7.0
18
  # via pydantic
19
+ anyio==4.13.0
20
  # via
21
  # gradio
22
  # httpx
23
  # mcp
24
  # sse-starlette
25
  # starlette
26
+ async-timeout==5.0.1 ; python_full_version < '3.11'
27
+ # via aiohttp
28
+ attrs==26.1.0
29
  # via
30
+ # aiohttp
31
  # jsonschema
32
  # referencing
33
+ audioop-lts==0.2.2 ; python_full_version >= '3.13'
34
+ # via
35
+ # gradio
36
+ # standard-aifc
37
+ # standard-sunau
38
  audioread==3.1.0
39
  # via librosa
40
  brotli==1.2.0
41
  # via gradio
42
+ certifi==2026.2.25
43
  # via
44
  # httpcore
45
  # httpx
 
48
  # via
49
  # cryptography
50
  # soundfile
51
+ charset-normalizer==3.4.6
52
  # via requests
53
  click==8.3.1
54
  # via
55
  # typer
56
  # uvicorn
57
+ colorama==0.4.6 ; sys_platform == 'win32'
58
+ # via
59
+ # click
60
+ # tqdm
61
+ cryptography==46.0.6
62
  # via pyjwt
63
+ datasets==4.8.4
64
  decorator==5.2.1
65
  # via librosa
66
+ dill==0.4.1
67
+ # via
68
+ # datasets
69
+ # multiprocess
70
+ exceptiongroup==1.3.1 ; python_full_version < '3.11'
71
  # via anyio
72
+ fastapi==0.135.2
73
  # via gradio
74
  ffmpy==1.0.0
75
  # via gradio
76
+ filelock==3.25.2
77
  # via
78
+ # datasets
79
  # huggingface-hub
80
  # torch
81
+ frozenlist==1.8.0
 
82
  # via
83
+ # aiohttp
84
+ # aiosignal
85
+ fsspec==2026.2.0
86
+ # via
87
+ # datasets
88
  # gradio-client
89
  # huggingface-hub
90
  # torch
91
+ gradio==6.10.0
92
  # via
93
+ # kyutai-stt-2-6b-en
94
  # spaces
95
+ gradio-client==2.4.0
96
+ # via
97
+ # gradio
98
+ # hf-gradio
99
  groovy==0.1.2
100
  # via gradio
101
  h11==0.16.0
102
  # via
103
  # httpcore
104
  # uvicorn
105
+ hf-gradio==0.3.0
106
+ # via gradio
107
+ hf-xet==1.4.2 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
108
  # via huggingface-hub
109
  httpcore==1.0.9
110
  # via httpx
111
  httpx==0.28.1
112
  # via
113
+ # datasets
114
  # gradio
115
  # gradio-client
116
+ # huggingface-hub
117
  # mcp
118
  # safehttpx
119
  # spaces
120
  httpx-sse==0.4.3
121
  # via mcp
122
+ huggingface-hub==1.8.0
123
  # via
124
  # accelerate
125
+ # datasets
126
  # gradio
127
  # gradio-client
128
  # tokenizers
 
132
  # anyio
133
  # httpx
134
  # requests
135
+ # yarl
136
  jinja2==3.1.6
137
  # via
138
  # gradio
139
  # torch
140
+ joblib==1.5.3
141
  # via
142
  # librosa
143
  # scikit-learn
144
+ jsonschema==4.26.0
145
  # via mcp
146
  jsonschema-specifications==2025.9.1
147
  # via jsonschema
148
+ lazy-loader==0.5
149
  # via librosa
150
  librosa==0.11.0
151
+ # via kyutai-stt-2-6b-en
152
+ llvmlite==0.46.0
153
  # via numba
154
  markdown-it-py==4.0.0
155
  # via rich
 
157
  # via
158
  # gradio
159
  # jinja2
160
+ mcp==1.26.0
161
  # via gradio
162
  mdurl==0.1.2
163
  # via markdown-it-py
 
165
  # via sympy
166
  msgpack==1.1.2
167
  # via librosa
168
+ multidict==6.7.1
169
+ # via
170
+ # aiohttp
171
+ # yarl
172
+ multiprocess==0.70.19
173
+ # via datasets
174
+ networkx==3.4.2 ; python_full_version < '3.11'
175
  # via torch
176
+ networkx==3.6.1 ; python_full_version >= '3.11'
177
+ # via torch
178
+ numba==0.64.0
179
  # via librosa
180
+ numpy==2.2.6 ; python_full_version < '3.11'
181
+ # via
182
+ # accelerate
183
+ # datasets
184
+ # gradio
185
+ # librosa
186
+ # numba
187
+ # pandas
188
+ # scikit-learn
189
+ # scipy
190
+ # soundfile
191
+ # soxr
192
+ # transformers
193
+ numpy==2.4.4 ; python_full_version >= '3.11'
194
  # via
195
  # accelerate
196
+ # datasets
197
  # gradio
198
  # librosa
199
  # numba
 
203
  # soundfile
204
  # soxr
205
  # transformers
206
+ nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
207
  # via
208
  # nvidia-cudnn-cu12
209
  # nvidia-cusolver-cu12
210
  # torch
211
+ nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
212
  # via torch
213
+ nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
214
  # via torch
215
+ nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
216
  # via torch
217
+ nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
218
  # via torch
219
+ nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
220
  # via torch
221
+ nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
222
  # via torch
223
+ nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
224
  # via torch
225
+ nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
226
  # via torch
227
+ nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
228
  # via
229
  # nvidia-cusolver-cu12
230
  # torch
231
+ nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
232
  # via torch
233
+ nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
234
  # via torch
235
+ nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
236
  # via
237
  # nvidia-cufft-cu12
238
  # nvidia-cusolver-cu12
239
  # nvidia-cusparse-cu12
240
  # torch
241
+ nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
242
+ # via torch
243
+ nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
244
  # via torch
245
+ orjson==3.11.7
246
  # via gradio
247
+ packaging==26.0
248
  # via
249
  # accelerate
250
+ # datasets
251
  # gradio
252
  # gradio-client
253
  # huggingface-hub
 
255
  # pooch
256
  # spaces
257
  # transformers
258
+ pandas==2.3.3 ; python_full_version < '3.11'
259
+ # via
260
+ # datasets
261
+ # gradio
262
+ pandas==3.0.2 ; python_full_version >= '3.11'
263
+ # via
264
+ # datasets
265
+ # gradio
266
+ pillow==12.1.1
267
  # via gradio
268
+ platformdirs==4.9.4
269
  # via pooch
270
+ pooch==1.9.0
271
  # via librosa
272
+ propcache==0.4.1
273
+ # via
274
+ # aiohttp
275
+ # yarl
276
  psutil==5.9.8
277
  # via
278
  # accelerate
279
  # spaces
280
+ pyarrow==23.0.1
281
+ # via datasets
282
+ pycparser==3.0 ; implementation_name != 'PyPy'
283
  # via cffi
284
+ pydantic==2.12.5
285
  # via
286
  # fastapi
287
  # gradio
 
290
  # spaces
291
  pydantic-core==2.41.5
292
  # via pydantic
293
+ pydantic-settings==2.13.1
294
  # via mcp
295
  pydub==0.25.1
296
  # via gradio
297
+ pygments==2.20.0
298
  # via rich
299
+ pyjwt==2.12.1
300
  # via mcp
301
  python-dateutil==2.9.0.post0
302
  # via pandas
303
+ python-dotenv==1.2.2
304
  # via pydantic-settings
305
+ python-multipart==0.0.22
306
  # via
307
  # gradio
308
  # mcp
309
+ pytz==2026.1.post1
310
+ # via
311
+ # gradio
312
+ # pandas
313
+ pywin32==311 ; sys_platform == 'win32'
314
+ # via mcp
315
  pyyaml==6.0.3
316
  # via
317
  # accelerate
318
+ # datasets
319
  # gradio
320
  # huggingface-hub
321
  # transformers
 
323
  # via
324
  # jsonschema
325
  # jsonschema-specifications
326
+ regex==2026.3.32
327
  # via transformers
328
+ requests==2.33.1
329
  # via
330
+ # datasets
331
  # pooch
332
  # spaces
333
+ rich==14.3.3
 
334
  # via typer
335
  rpds-py==0.30.0
336
  # via
 
342
  # via
343
  # accelerate
344
  # transformers
345
+ scikit-learn==1.7.2 ; python_full_version < '3.11'
346
  # via librosa
347
+ scikit-learn==1.8.0 ; python_full_version >= '3.11'
348
+ # via librosa
349
+ scipy==1.15.3 ; python_full_version < '3.11'
350
+ # via
351
+ # librosa
352
+ # scikit-learn
353
+ scipy==1.17.1 ; python_full_version >= '3.11'
354
  # via
355
  # librosa
356
  # scikit-learn
357
  semantic-version==2.10.0
358
  # via gradio
359
+ setuptools==82.0.1 ; python_full_version >= '3.12'
360
+ # via torch
361
  shellingham==1.5.4
362
  # via typer
363
  six==1.17.0
 
366
  # via librosa
367
  soxr==1.0.0
368
  # via librosa
369
+ sse-starlette==3.3.4
 
 
370
  # via mcp
371
+ standard-aifc==3.13.0 ; python_full_version >= '3.13'
372
+ # via
373
+ # audioread
374
+ # librosa
375
+ standard-chunk==3.13.0 ; python_full_version >= '3.13'
376
+ # via standard-aifc
377
+ standard-sunau==3.13.0 ; python_full_version >= '3.13'
378
+ # via
379
+ # audioread
380
+ # librosa
381
+ starlette==0.52.1
382
  # via
383
  # fastapi
384
  # gradio
385
  # mcp
386
+ # sse-starlette
387
  sympy==1.14.0
388
  # via torch
389
  threadpoolctl==3.6.0
390
  # via scikit-learn
391
+ tokenizers==0.22.2
392
  # via transformers
393
  tomlkit==0.13.3
394
  # via gradio
395
+ torch==2.9.1
396
  # via
 
397
  # accelerate
398
+ # kyutai-stt-2-6b-en
399
+ tqdm==4.67.3
400
  # via
401
+ # datasets
402
  # huggingface-hub
403
  # transformers
404
+ transformers==5.4.0
405
+ # via kyutai-stt-2-6b-en
406
+ triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
407
  # via torch
408
+ typer==0.24.1
409
+ # via
410
+ # gradio
411
+ # hf-gradio
412
+ # huggingface-hub
413
+ # transformers
414
  typing-extensions==4.15.0
415
  # via
416
+ # aiosignal
417
  # anyio
418
  # cryptography
419
  # exceptiongroup
 
423
  # huggingface-hub
424
  # librosa
425
  # mcp
426
+ # multidict
427
  # pydantic
428
  # pydantic-core
429
+ # pyjwt
430
  # referencing
431
  # spaces
432
  # starlette
433
  # torch
 
434
  # typing-inspection
435
  # uvicorn
436
  typing-inspection==0.4.2
437
  # via
438
+ # fastapi
439
  # mcp
440
  # pydantic
441
  # pydantic-settings
442
+ tzdata==2025.3 ; python_full_version < '3.11' or sys_platform == 'emscripten' or sys_platform == 'win32'
443
  # via pandas
444
+ urllib3==2.6.3
445
  # via requests
446
+ uvicorn==0.42.0
447
  # via
448
  # gradio
449
  # mcp
450
+ xxhash==3.6.0
451
+ # via datasets
452
+ yarl==1.23.0
453
+ # via aiohttp
style.css CHANGED
@@ -28,12 +28,21 @@
28
  font-weight: 400;
29
  }
30
 
31
- /* Main content card */
 
 
 
 
 
 
 
 
 
 
 
32
  .main-card {
33
- background: white;
34
  border-radius: 1rem;
35
  padding: 1.5rem;
36
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
37
  margin-bottom: 1.5rem;
38
  }
39
 
@@ -42,10 +51,9 @@
42
  margin-bottom: 1.5rem;
43
  }
44
 
45
- /* Transcription output styling */
46
  .transcription-output {
47
  min-height: 120px;
48
- background: #f8fafc;
49
  border-radius: 0.75rem;
50
  padding: 1rem;
51
  font-size: 1rem;
@@ -57,27 +65,6 @@
57
  margin-top: 2rem;
58
  }
59
 
60
- /* Footer */
61
- .footer-container {
62
- text-align: center;
63
- padding: 1.5rem 1rem;
64
- margin-top: 2rem;
65
- border-top: 1px solid #e2e8f0;
66
- font-size: 0.875rem;
67
- color: #64748b;
68
- }
69
-
70
- .footer-link {
71
- color: #667eea;
72
- text-decoration: none;
73
- font-weight: 500;
74
- transition: color 0.2s;
75
- }
76
-
77
- .footer-link:hover {
78
- color: #764ba2;
79
- }
80
-
81
  /* Mobile optimizations */
82
  @media (max-width: 640px) {
83
  .header-title {
@@ -111,14 +98,3 @@
111
  transform: translateY(-2px) !important;
112
  box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3) !important;
113
  }
114
-
115
- /* Info banner */
116
- .info-banner {
117
- background: #eff6ff;
118
- border-left: 4px solid #3b82f6;
119
- padding: 1rem;
120
- border-radius: 0.5rem;
121
- margin-bottom: 1.5rem;
122
- font-size: 0.875rem;
123
- color: #1e40af;
124
- }
 
28
  font-weight: 400;
29
  }
30
 
31
+ /* Info banner */
32
+ .info-banner {
33
+ border-left: 4px solid #3b82f6;
34
+ padding: 1rem;
35
+ border-radius: 0.5rem;
36
+ margin-bottom: 1.5rem;
37
+ font-size: 0.875rem;
38
+ background: var(--background-fill-secondary);
39
+ color: var(--body-text-color);
40
+ }
41
+
42
+ /* Main content card — let gr.Group handle background */
43
  .main-card {
 
44
  border-radius: 1rem;
45
  padding: 1.5rem;
 
46
  margin-bottom: 1.5rem;
47
  }
48
 
 
51
  margin-bottom: 1.5rem;
52
  }
53
 
54
+ /* Transcription output styling — no hardcoded background */
55
  .transcription-output {
56
  min-height: 120px;
 
57
  border-radius: 0.75rem;
58
  padding: 1rem;
59
  font-size: 1rem;
 
65
  margin-top: 2rem;
66
  }
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  /* Mobile optimizations */
69
  @media (max-width: 640px) {
70
  .header-title {
 
98
  transform: translateY(-2px) !important;
99
  box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3) !important;
100
  }
 
 
 
 
 
 
 
 
 
 
 
uv.lock CHANGED
The diff for this file is too large to render. See raw diff