Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -11,6 +11,7 @@ import torch
|
|
| 11 |
import torch.nn as nn
|
| 12 |
import torch.nn.functional as F
|
| 13 |
import numpy as np
|
|
|
|
| 14 |
import time
|
| 15 |
from transformers import PreTrainedTokenizerFast
|
| 16 |
from huggingface_hub import hf_hub_download, login
|
|
@@ -249,9 +250,7 @@ def transcribe(audio, model_name):
|
|
| 249 |
if wav.ndim > 1:
|
| 250 |
wav = wav.mean(axis=-1)
|
| 251 |
if sr != 16000:
|
| 252 |
-
|
| 253 |
-
length = int(len(wav) * 16000 / sr)
|
| 254 |
-
wav = np.interp(np.linspace(0, len(wav)-1, length), np.arange(len(wav)), wav).astype(np.float32)
|
| 255 |
elif isinstance(audio, tuple):
|
| 256 |
sr, wav = audio
|
| 257 |
wav = np.array(wav, dtype=np.float32)
|
|
@@ -260,9 +259,7 @@ def transcribe(audio, model_name):
|
|
| 260 |
if np.abs(wav).max() > 1.0:
|
| 261 |
wav = wav / 32768.0
|
| 262 |
if sr != 16000:
|
| 263 |
-
|
| 264 |
-
length = int(len(wav) * 16000 / sr)
|
| 265 |
-
wav = np.interp(np.linspace(0, len(wav)-1, length), np.arange(len(wav)), wav).astype(np.float32)
|
| 266 |
else:
|
| 267 |
return "Unsupported audio format"
|
| 268 |
|
|
|
|
| 11 |
import torch.nn as nn
|
| 12 |
import torch.nn.functional as F
|
| 13 |
import numpy as np
|
| 14 |
+
import librosa
|
| 15 |
import time
|
| 16 |
from transformers import PreTrainedTokenizerFast
|
| 17 |
from huggingface_hub import hf_hub_download, login
|
|
|
|
| 250 |
if wav.ndim > 1:
|
| 251 |
wav = wav.mean(axis=-1)
|
| 252 |
if sr != 16000:
|
| 253 |
+
wav = librosa.resample(wav, orig_sr=sr, target_sr=16000)
|
|
|
|
|
|
|
| 254 |
elif isinstance(audio, tuple):
|
| 255 |
sr, wav = audio
|
| 256 |
wav = np.array(wav, dtype=np.float32)
|
|
|
|
| 259 |
if np.abs(wav).max() > 1.0:
|
| 260 |
wav = wav / 32768.0
|
| 261 |
if sr != 16000:
|
| 262 |
+
wav = librosa.resample(wav, orig_sr=sr, target_sr=16000)
|
|
|
|
|
|
|
| 263 |
else:
|
| 264 |
return "Unsupported audio format"
|
| 265 |
|