|
import gradio as gr |
|
import torch |
|
from transformers import pipeline |
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo", device=device) |
|
|
|
|
|
def audio_to_audio_chatbot(audio): |
|
if audio is None: |
|
return None, "Bitte eine Audio-Datei hochladen." |
|
|
|
|
|
text = speech_to_text(audio)["text"] |
|
print(f"User: {text}") |
|
|
|
|
|
response_text = f"Du hast gesagt: {text}" |
|
print(f"Bot: {response_text}") |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
iface = gr.Interface( |
|
fn=audio_to_audio_chatbot, |
|
inputs=gr.Audio(source="microphone", type="filepath"), |
|
outputs= gr.Textbox(), |
|
|
|
title="Audio-zu-Audio-Chatbot (Streaming)", |
|
description="Spreche in das Mikrofon und der Bot antwortet mit einer Audio-Ausgabe.", |
|
live=True |
|
) |
|
|
|
iface.launch() |
|
|
|
|