|
import gradio as gr |
|
import torch |
|
from transformers import pipeline |
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo", device=device) |
|
|
|
|
|
def audio_to_audio_chatbot(audio): |
|
if audio is None: |
|
return None, "Bitte eine Audio-Datei hochladen." |
|
|
|
|
|
text = speech_to_text(audio)["text"] |
|
return text |
|
print(f"User: {text}") |
|
|
|
|
|
response_text = f"Du hast gesagt: {text}" |
|
print(f"Bot: {response_text}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
iface = gr.Interface( |
|
fn=audio_to_audio_chatbot, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs= gr.Textbox(), |
|
#outputs=[gr.Audio(), gr.Textbox()], |
|
title="Audio-zu-Audio-Chatbot (Streaming)", |
|
description="Spreche in das Mikrofon und der Bot antwortet mit einer Audio-Ausgabe.", |
|
live=True # Aktiviert Streaming |
|
) |
|
""" |
|
with gr.Blocks() as speech: |
|
with gr.Row(): |
|
sr_outputs = gr.Textbox(label="Antwort") |
|
with gr.Row(): |
|
sr_inputs = gr.Microphone(type="filepath") |
|
sr_inputs.change(audio_to_audio_chatbot, inputs=sr_inputs, outputs=sr_outputs) |
|
|
|
speech.launch() |
|
|
|
|