voicebot / app.py
mgokg's picture
Update app.py
badf650 verified
raw
history blame
1.62 kB
import gradio as gr
from transformers import pipeline
import torch
# Gerät auswählen (GPU, falls verfügbar, sonst CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
# Lade das kleinere Modell
pipe = pipeline("text-generation", model="google/flan-t5-xxl", device=device)
def chat(message, history):
prompt = f"User: {message}\nAssistant:"
sequences = pipe(prompt, max_length=256)
response = sequences[0]['generated_text'].split("Assistant:")[1].strip()
history.append((message, response))
return history, history
def transcribe_and_send(audio, history):
if audio is None:
return history, "Keine Audioaufnahme erhalten."
try:
asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
transcription = asr(audio)["text"]
return chat(transcription, history)
except Exception as e:
print(f"Fehler bei der Transkription: {e}")
return history, "Fehler bei der Audioverarbeitung."
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
state = gr.State([])
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Sprachaufnahme")
text_input = gr.Textbox(placeholder="Nachricht eingeben...")
send_button = gr.Button("Senden (Text)")
clear_button = gr.Button("Chat löschen")
send_button.click(chat, [text_input, state], [state, chatbot])
audio_input.submit(transcribe_and_send, [audio_input, state], [state, chatbot])
clear_button.click(lambda: [], outputs=[chatbot])
text_input.submit(chat, [text_input, state], [state, chatbot])
if __name__ == "__main__":
demo.launch()