Spaces:
Sleeping
Sleeping
Versión inicial de Gradio
Browse files- app.py +48 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
+
import librosa
|
5 |
+
|
6 |
+
# Utilizamos los tres modelos entrenados
|
7 |
+
pipe_model_1 = pipeline("automatic-speech-recognition", model="IABDs8a/AfinandoElEntrenamiento")
|
8 |
+
pipe_model_2 = pipeline("automatic-speech-recognition", model="IABDs8a/whisper-base-full")
|
9 |
+
pipe_model_3 = pipeline("automatic-speech-recognition", model="IABDs8a/whisper-tiny-top3")
|
10 |
+
|
11 |
+
def transcribe(audio, model_choice):
|
12 |
+
if model_choice == "AfinandoElEntrenamiento":
|
13 |
+
pipe = pipe_model_1
|
14 |
+
elif model_choice == "Whisper Base Full":
|
15 |
+
pipe = pipe_model_2
|
16 |
+
else:
|
17 |
+
pipe = pipe_model_3
|
18 |
+
|
19 |
+
# Leer el archivo de audio
|
20 |
+
y, sr = librosa.load(audio, sr=16000)
|
21 |
+
|
22 |
+
# Convertir a mono si es necesario
|
23 |
+
if y.ndim > 1:
|
24 |
+
y = librosa.to_mono(y)
|
25 |
+
|
26 |
+
# Pasamos el array de muestras a tipo NumPy de 32 bits
|
27 |
+
y = y.astype(np.float32)
|
28 |
+
|
29 |
+
# Normalizar el audio
|
30 |
+
y /= np.max(np.abs(y))
|
31 |
+
|
32 |
+
# Realizar la transcripción
|
33 |
+
result = pipe({"sampling_rate": sr, "raw": y})
|
34 |
+
return result["text"]
|
35 |
+
|
36 |
+
# Interfaz de Gradio
|
37 |
+
demo = gr.Interface(
|
38 |
+
fn=transcribe,
|
39 |
+
inputs=[
|
40 |
+
gr.Audio(type="filepath", label="Sube un archivo de audio o graba desde el micrófono"),
|
41 |
+
gr.Dropdown(choices=["AfinandoElEntrenamiento", "Whisper Base Full", "Whisper Tiny Top 3"], label="Selecciona el modelo", value="Whisper Base Full")
|
42 |
+
],
|
43 |
+
outputs="text",
|
44 |
+
title="Transcripción de Audio con Whisper",
|
45 |
+
description="Sube un archivo de audio o graba desde el micrófono para obtener su transcripción utilizando los modelos Whisper entrenados.",
|
46 |
+
)
|
47 |
+
|
48 |
+
demo.launch(share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
diffusers
|
2 |
+
torch
|
3 |
+
transformers
|
4 |
+
accelerate
|
5 |
+
numpy
|
6 |
+
gradio
|
7 |
+
torchaudio
|
8 |
+
librosa
|