Spaces:

speech-uk
/

vad-marblenet

Running

App Files Files Community

Yehor commited on May 28, 2025

Commit

9720263

1 Parent(s): 16b04a0

Improve the code

Browse files

Files changed (3) hide show

.gitignore +9 -0
app.py +140 -21
requirements.txt +1 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+__pycache__
+.venv
+.ruff_cache
+rttm_outputs
+vad_frame_outputs
+tmp.json
+tmp.zip

app.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import os
 from os.path import basename
 from zipfile import ZipFile, ZIP_DEFLATED
 from shutil import rmtree
 import sentry_sdk
-import sphn
 import gradio as gr
 from inference import inference_file
-if os.environ["SENTRY_DSN"]:
     sentry_sdk.init(
         dsn=os.environ["SENTRY_DSN"],
         send_default_pii=True,
@@ -16,21 +20,76 @@ if os.environ["SENTRY_DSN"]:
     print("Sentry SDK is activated")
-def extract_chunks(file, min_sec, max_sec):
     archive_name = "tmp.zip"
     n_files = 0
     duration_secs = 0
     with ZipFile(
-        archive_name, "w", compression=ZIP_DEFLATED, allowZip64=True, compresslevel=9
     ) as zip_file:
-        results = inference_file(file)
-        filenames = [it["filename"] for it in results]
-        durations = sphn.durations(filenames)
         for idx, result in enumerate(results):
-            duration = durations[idx]
             print(result, duration)
@@ -56,15 +115,75 @@ def extract_chunks(file, min_sec, max_sec):
     return archive_name
-demo = gr.Interface(
-    title="MarbleNet",
-    fn=extract_chunks,
-    inputs=[
-        gr.File(label="WAV file to process", file_count="single", file_types=[".wav"]),
-        gr.Number(label="Minimum seconds", value=0.1, minimum=0.01, maximum=59.99),
-        gr.Number(label="Maximum seconds", value=30, minimum=0.02, maximum=60),
-    ],
-    outputs=[gr.File(label="ZIP file with voice chunks")],
-    submit_btn="Inference",
-)
-demo.launch()

 import os
+import sys
 from os.path import basename
 from zipfile import ZipFile, ZIP_DEFLATED
 from shutil import rmtree
+from importlib.metadata import version
+import torch
 import sentry_sdk
 import gradio as gr
+from gradio.themes import Soft
 from inference import inference_file
+if "SENTRY_DSN" in os.environ:
     sentry_sdk.init(
         dsn=os.environ["SENTRY_DSN"],
         send_default_pii=True,
     print("Sentry SDK is activated")
+use_cuda = torch.cuda.is_available()
+if use_cuda:
+    print("CUDA is available, setting correct inference_device variable.")
+    device = "cuda"
+else:
+    device = "cpu"
+# https://www.tablesgenerator.com/markdown_tables
+authors_table = """
+## Authors
+Follow them in social networks and **contact** if you need any help or have any questions:
+| **Yehor Smoliakov** |
+|-------------------------------------------------------------------------------------------------|
+| https://t.me/smlkw in Telegram                                                                  |
+| https://x.com/yehor_smoliakov at X                                                              |
+| https://github.com/egorsmkv at GitHub                                                           |
+| https://huggingface.co/Yehor at Hugging Face                                                    |
+| or use egorsmkv@gmail.com                                                                       |
+""".strip()
+tech_env = f"""
+#### Environment
+- Python: {sys.version}
+- Torch device: {device}
+#### Models
+##### Acoustic model (Voice Activity Detection)
+- Name: MarbleNet
+- URL: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/vad_marblenet
+""".strip()
+tech_libraries = f"""
+#### Libraries
+- torch: {version("torch")}
+- sphn: {version("sphn")}
+- gradio: {version("gradio")}
+- sentry_sdk: {version("sentry_sdk")}
+""".strip()
+description_head = """
+# MarbleNet
+Split an audio file to voice chunks.
+""".strip()
+concurrency_limit = 5
+def extract_chunks(wav_file, min_sec, max_sec):
     archive_name = "tmp.zip"
     n_files = 0
     duration_secs = 0
     with ZipFile(
+        archive_name,
+        "w",
+        compression=ZIP_DEFLATED,
+        allowZip64=True,
+        compresslevel=9,
     ) as zip_file:
+        try:
+            results = inference_file(wav_file)
+        except Exception as exc:
+            sentry_sdk.capture_exception(exc)
+            raise gr.Error("Something went wrong, we will be notified about this")
         for idx, result in enumerate(results):
+            duration = result["speech"]["duration"]
             print(result, duration)
     return archive_name
+def create_app():
+    tab = gr.Blocks(
+        title="MarbleNet",
+        analytics_enabled=False,
+        theme=Soft(),
+    )
+    with tab:
+        gr.Markdown(description_head)
+        gr.Markdown("## Usage")
+        with gr.Column():
+            wav_file = gr.File(
+                label="WAV file to process",
+                file_count="single",
+                file_types=[".wav"],
+            )
+            min_sec = gr.Number(
+                label="Minimum seconds", value=0.1, minimum=0.01, maximum=59.99
+            )
+            max_sec = gr.Number(
+                label="Maximum seconds", value=30, minimum=0.02, maximum=60
+            )
+        with gr.Column():
+            zip_file = gr.File(label="ZIP file with voice chunks")
+        gr.Button("Run").click(
+            extract_chunks,
+            concurrency_limit=concurrency_limit,
+            inputs=[wav_file, min_sec, max_sec],
+            outputs=[zip_file],
+        )
+    return tab
+def create_env():
+    with gr.Blocks(theme=Soft()) as tab:
+        gr.Markdown(tech_env)
+        gr.Markdown(tech_libraries)
+    return tab
+def create_authors():
+    with gr.Blocks(theme=Soft()) as tab:
+        gr.Markdown(authors_table)
+    return tab
+def create_demo():
+    app_tab = create_app()
+    authors_tab = create_authors()
+    env_tab = create_env()
+    return gr.TabbedInterface(
+        [app_tab, authors_tab, env_tab],
+        tab_names=[
+            "🎙️ VAD",
+            "👥 Authors",
+            "📦 Environment, Models, and Libraries",
+        ],
+    )
+if __name__ == "__main__":
+    demo = create_demo()
+    demo.queue()
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -11,4 +11,4 @@ plotly
 gradio
-sentry-sdk


11
12	gradio
13
14	+ sentry-sdk[huggingface_hub]