File size: 1,623 Bytes
3e9fe0d badf650 3e9fe0d a06d476 3e9fe0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import pipeline
import torch
# Gerät auswählen (GPU, falls verfügbar, sonst CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
# Lade das kleinere Modell
pipe = pipeline("text-generation", model="google/flan-t5-xxl", device=device)
def chat(message, history):
prompt = f"User: {message}\nAssistant:"
sequences = pipe(prompt, max_length=256)
response = sequences[0]['generated_text'].split("Assistant:")[1].strip()
history.append((message, response))
return history, history
def transcribe_and_send(audio, history):
if audio is None:
return history, "Keine Audioaufnahme erhalten."
try:
asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
transcription = asr(audio)["text"]
return chat(transcription, history)
except Exception as e:
print(f"Fehler bei der Transkription: {e}")
return history, "Fehler bei der Audioverarbeitung."
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
state = gr.State([])
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Sprachaufnahme")
text_input = gr.Textbox(placeholder="Nachricht eingeben...")
send_button = gr.Button("Senden (Text)")
clear_button = gr.Button("Chat löschen")
send_button.click(chat, [text_input, state], [state, chatbot])
audio_input.submit(transcribe_and_send, [audio_input, state], [state, chatbot])
clear_button.click(lambda: [], outputs=[chatbot])
text_input.submit(chat, [text_input, state], [state, chatbot])
if __name__ == "__main__":
demo.launch() |