stukenov commited on
Commit
7768093
·
verified ·
1 Parent(s): 99a484e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -11,6 +11,7 @@ import torch
11
  import torch.nn as nn
12
  import torch.nn.functional as F
13
  import numpy as np
 
14
  import time
15
  from transformers import PreTrainedTokenizerFast
16
  from huggingface_hub import hf_hub_download, login
@@ -249,9 +250,7 @@ def transcribe(audio, model_name):
249
  if wav.ndim > 1:
250
  wav = wav.mean(axis=-1)
251
  if sr != 16000:
252
- # Simple resample via interpolation
253
- length = int(len(wav) * 16000 / sr)
254
- wav = np.interp(np.linspace(0, len(wav)-1, length), np.arange(len(wav)), wav).astype(np.float32)
255
  elif isinstance(audio, tuple):
256
  sr, wav = audio
257
  wav = np.array(wav, dtype=np.float32)
@@ -260,9 +259,7 @@ def transcribe(audio, model_name):
260
  if np.abs(wav).max() > 1.0:
261
  wav = wav / 32768.0
262
  if sr != 16000:
263
- # Simple resample via interpolation
264
- length = int(len(wav) * 16000 / sr)
265
- wav = np.interp(np.linspace(0, len(wav)-1, length), np.arange(len(wav)), wav).astype(np.float32)
266
  else:
267
  return "Unsupported audio format"
268
 
 
11
  import torch.nn as nn
12
  import torch.nn.functional as F
13
  import numpy as np
14
+ import librosa
15
  import time
16
  from transformers import PreTrainedTokenizerFast
17
  from huggingface_hub import hf_hub_download, login
 
250
  if wav.ndim > 1:
251
  wav = wav.mean(axis=-1)
252
  if sr != 16000:
253
+ wav = librosa.resample(wav, orig_sr=sr, target_sr=16000)
 
 
254
  elif isinstance(audio, tuple):
255
  sr, wav = audio
256
  wav = np.array(wav, dtype=np.float32)
 
259
  if np.abs(wav).max() > 1.0:
260
  wav = wav / 32768.0
261
  if sr != 16000:
262
+ wav = librosa.resample(wav, orig_sr=sr, target_sr=16000)
 
 
263
  else:
264
  return "Unsupported audio format"
265