import os
import sys
from os import rename
from os.path import basename
from zipfile import ZipFile, ZIP_DEFLATED
from shutil import rmtree
from importlib.metadata import version

import sphn
import torch
import sentry_sdk
import gradio as gr
from gradio.themes import Soft

from inference import inference_file

if "SENTRY_DSN" in os.environ:
    sentry_sdk.init(
        dsn=os.environ["SENTRY_DSN"],
        send_default_pii=True,
    )
    print("Sentry SDK is activated")


use_cuda = torch.cuda.is_available()

if use_cuda:
    print("CUDA is available, setting correct device variable.")
    device = "cuda"
else:
    device = "cpu"

# https://www.tablesgenerator.com/markdown_tables
authors_table = """
## Authors
Follow them in social networks and **contact** if you need any help or have any questions:
| **Yehor Smoliakov** |
|-------------------------------------------------------------------------------------------------|
| https://t.me/smlkw in Telegram                                                                  |
| https://x.com/yehor_smoliakov at X                                                              |
| https://github.com/egorsmkv at GitHub                                                           |
| https://huggingface.co/Yehor at Hugging Face                                                    |
| or use egorsmkv@gmail.com                                                                       |
""".strip()

tech_env = f"""
#### Environment
- Python: {sys.version}
- Torch device: {device}

#### Models

##### Acoustic model (Voice Activity Detection)
- Name: MarbleNet
- URL: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/vad_marblenet
""".strip()

tech_libraries = f"""
#### Libraries
- torch: {version("torch")}
- sphn: {version("sphn")}
- gradio: {version("gradio")}
- sentry_sdk: {version("sentry_sdk")}
""".strip()

description_head = """
# MarbleNet

Split an audio file to voice chunks.
""".strip()

concurrency_limit = 5


def inference_func(wav_file, min_sec, max_sec):
    archive_name = "tmp.zip"
    n_files = 0
    duration_secs = 0

    # Validate the file
    try:
        data, sr = sphn.read(wav_file)
        duration = len(data[0]) / sr
        if duration < 0.1:
            raise gr.Error("The duration is too low")
        n_channels = len(data)
        if n_channels > 1:
            raise gr.Error(
                f"Your file must be in the mono format. The file has {n_channels} channels."
            )
    except Exception as e:
        raise gr.Error(f"Can't read your file, the problem: {e}")

    # Rename the file
    old_wav_file = wav_file
    wav_file = "input.wav"
    rename(old_wav_file, wav_file)

    with ZipFile(
        archive_name,
        "w",
        compression=ZIP_DEFLATED,
        allowZip64=True,
        compresslevel=9,
    ) as zip_file:
        try:
            results = inference_file(wav_file)
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
            raise gr.Error("Something went wrong, we will be notified about this")

        for idx, result in enumerate(results):
            duration = result["speech"]["duration"]

            print(result, duration)

            if duration <= min_sec or duration >= max_sec:
                print("Skipping...")
                continue

            arc_name = basename(result["filename"])
            zip_file.write(result["filename"], arc_name)

            duration_secs += duration
            n_files += 1

    # Remove files
    rmtree("chunks")

    mins = round(duration_secs / 60, 4)

    gr.Success(
        f"VAD model identified {n_files} files in interval [{min_sec}:{max_sec}], total duration = {mins} min."
    )

    return archive_name


def create_app():
    tab = gr.Blocks(
        title="MarbleNet",
        analytics_enabled=False,
        theme=Soft(),
    )

    with tab:
        gr.Markdown(description_head)

        gr.Markdown("## Usage")

        with gr.Column():
            wav_file = gr.File(
                label="WAV file to process",
                file_count="single",
                file_types=[".wav"],
            )
            min_sec = gr.Number(
                label="Minimum seconds", value=0.1, minimum=0.01, maximum=59.99
            )
            max_sec = gr.Number(
                label="Maximum seconds", value=30, minimum=0.02, maximum=60
            )

        with gr.Column():
            zip_file = gr.File(label="ZIP file with voice chunks")

        gr.Button("Run").click(
            inference_func,
            concurrency_limit=concurrency_limit,
            inputs=[wav_file, min_sec, max_sec],
            outputs=[zip_file],
        )

    return tab


def create_env():
    with gr.Blocks(theme=Soft()) as tab:
        gr.Markdown(tech_env)
        gr.Markdown(tech_libraries)

    return tab


def create_authors():
    with gr.Blocks(theme=Soft()) as tab:
        gr.Markdown(authors_table)

    return tab


def create_demo():
    app_tab = create_app()
    authors_tab = create_authors()
    env_tab = create_env()

    return gr.TabbedInterface(
        [app_tab, authors_tab, env_tab],
        tab_names=[
            "🎙️ VAD",
            "👥 Authors",
            "📦 Environment, Models, and Libraries",
        ],
    )


if __name__ == "__main__":
    demo = create_demo()
    demo.queue()
    demo.launch()