mgokg commited on
Commit
51a7232
·
verified ·
1 Parent(s): ed1ccfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -41
app.py CHANGED
@@ -1,48 +1,37 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import torch
 
4
 
5
- # Gerät auswählen (GPU, falls verfügbar, sonst CPU)
6
  device = "cuda" if torch.cuda.is_available() else "cpu"
7
 
8
- # Lade das kleinere Modell
9
- pipe = pipeline("text-generation", model="google/flan-t5-xxl", device=device)
10
-
11
- def chat(message, history):
12
- prompt = f"User: {message}\nAssistant:"
13
- sequences = pipe(prompt, max_length=256)
14
- response = sequences[0]['generated_text'].split("Assistant:")[1].strip()
15
- history.append((message, response))
16
- return history, history
17
-
18
- def transcribe_and_send(audio, history):
19
- if audio is None:
20
- return history, "Keine Audioaufnahme erhalten."
21
-
22
- try:
23
- asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
24
- transcription = asr(audio)["text"]
25
- return chat(transcription, history)
26
- except Exception as e:
27
- print(f"Fehler bei der Transkription: {e}")
28
- return history, "Fehler bei der Audioverarbeitung."
29
-
30
- with gr.Blocks() as demo:
31
- chatbot = gr.Chatbot()
32
- state = gr.State([])
33
-
34
- with gr.Row():
35
- audio_input = gr.Audio(type="filepath", label="Sprachaufnahme")
36
- text_input = gr.Textbox(placeholder="Nachricht eingeben...")
37
-
38
- text_output = gr.Textbox()
39
- send_button = gr.Button("Senden")
40
- clear_button = gr.Button("Chat löschen")
41
-
42
- send_button.click(chat, [text_input, state], [state, chatbot])
43
- #audio_input.submit(transcribe_and_send, [audio_input, state], [state, chatbot])
44
- clear_button.click(lambda: [], outputs=text_output)
45
- #text_input.submit(chat, [text_input, state], [state, chatbot])
46
 
47
  if __name__ == "__main__":
48
- demo.launch()
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import pipeline
4
 
5
+ # Laden der Modelle (einmalig beim Start)
6
  device = "cuda" if torch.cuda.is_available() else "cpu"
7
 
8
+ speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=device)
9
+ text_to_speech = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech", device=device)
10
+
11
+ def audio_to_audio_chatbot(audio):
12
+ if audio is None:
13
+ return None, "Bitte eine Audio-Datei hochladen."
14
+
15
+ # 1. Speech-to-Text
16
+ text = speech_to_text(audio)["text"]
17
+ print(f"User: {text}")
18
+
19
+ # 2. Text-to-Text (Hier wird ein einfacher Echo-Bot verwendet, kann durch ein komplexeres Modell ersetzt werden)
20
+ response_text = f"Du hast gesagt: {text}"
21
+ print(f"Bot: {response_text}")
22
+
23
+ # 3. Text-to-Speech
24
+ speech = text_to_speech(response_text)
25
+ return speech["audio"], response_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  if __name__ == "__main__":
28
+ iface = gr.Interface(
29
+ fn=audio_to_audio_chatbot,
30
+ inputs=gr.Audio(source="microphone", type="filepath"),
31
+ outputs=[gr.Audio(), gr.Textbox()],
32
+ title="Audio-zu-Audio-Chatbot (Streaming)",
33
+ description="Spreche in das Mikrofon und der Bot antwortet mit einer Audio-Ausgabe.",
34
+ live=True # Aktiviert Streaming
35
+ )
36
+
37
+ iface.launch()