TuringsSolutions commited on
Commit
1afc2d5
·
verified ·
1 Parent(s): bacc85c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -105
app.py CHANGED
@@ -1,113 +1,103 @@
1
  import gradio as gr
2
- import numpy as np
3
- import io
4
- import tempfile
5
- from pydub import AudioSegment
6
- from dataclasses import dataclass, field
7
-
8
- @dataclass
9
- class AppState:
10
- stream: np.ndarray | None = None
11
- sampling_rate: int = 0
12
- pause_detected: bool = False
13
- stopped: bool = False
14
- started_talking: bool = False
15
- conversation: list = field(default_factory=list)
16
-
17
- # Process audio input and detect pauses
18
- def process_audio(audio: tuple, state: AppState):
19
- if state.stream is None:
20
- state.stream = audio[1]
21
- state.sampling_rate = audio[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  else:
23
- state.stream = np.concatenate((state.stream, audio[1]))
24
-
25
- pause_detected = len(state.stream) > state.sampling_rate * 1
26
- state.pause_detected = pause_detected
27
-
28
- if state.pause_detected:
29
- return gr.Audio(recording=False), state # Stop recording
30
- return None, state
31
-
32
- # Generate response based on input type (text or audio)
33
- def response(input_data, state: AppState, input_type: str):
34
- if input_type == "text":
35
- # Ensure text input is handled correctly
36
- user_message = input_data.strip() # Prevent errors from empty inputs
37
- if not user_message:
38
- return "Please enter a valid message.", state
39
-
40
- state.conversation.append({"role": "user", "content": user_message})
41
- bot_response = f"Echo: {user_message}" # Simulated bot response
42
- state.conversation.append({"role": "assistant", "content": bot_response})
43
- return bot_response, state
44
-
45
- if input_type == "audio" and state.pause_detected:
46
- # Convert audio to WAV and store in conversation history
47
- audio_buffer = io.BytesIO()
48
- segment = AudioSegment(
49
- state.stream.tobytes(),
50
- frame_rate=state.sampling_rate,
51
- sample_width=state.stream.dtype.itemsize,
52
- channels=1 if len(state.stream.shape) == 1 else state.stream.shape[1]
53
- )
54
- segment.export(audio_buffer, format="wav")
55
-
56
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
57
- f.write(audio_buffer.getvalue())
58
- state.conversation.append({"role": "user", "content": {"path": f.name, "mime_type": "audio/wav"}})
59
-
60
- chatbot_response = b"Simulated response audio content"
61
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
62
- f.write(chatbot_response)
63
- state.conversation.append({"role": "assistant", "content": {"path": f.name, "mime_type": "audio/mp3"}})
64
-
65
- yield None, state
66
 
67
- return None, state # Handle unexpected input cases gracefully
 
 
 
68
 
69
- # Start recording audio input
70
- def start_recording_user(state: AppState):
71
- if not state.stopped:
72
- return gr.Audio(recording=True)
73
-
74
- # Gradio app setup
75
  with gr.Blocks() as demo:
 
76
  with gr.Row():
77
  with gr.Column():
78
- input_audio = gr.Audio(label="Input Audio", type="numpy")
79
- text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
80
  with gr.Column():
81
- chatbot = gr.Chatbot(label="Conversation", type="messages")
82
- output_audio = gr.Audio(label="Output Audio", streaming=True, autoplay=True)
83
-
84
- state = gr.State(value=AppState())
85
-
86
- # Handle audio input streaming
87
- stream = input_audio.stream(
88
- process_audio, [input_audio, state], [input_audio, state], stream_every=0.5, time_limit=30
89
- )
90
-
91
- # Handle text input submission
92
- text_submit = text_input.submit(
93
- lambda txt, s: response(txt, s, "text"), [text_input, state], [chatbot, state]
94
- )
95
-
96
- # Handle audio stop recording
97
- respond = input_audio.stop_recording(
98
- lambda s: response(None, s, "audio"), [state], [output_audio, state]
99
- )
100
- respond.then(lambda s: s.conversation, [state], [chatbot])
101
-
102
- # Restart recording after audio playback ends
103
- restart = output_audio.stop(start_recording_user, [state], [input_audio])
104
-
105
- # Stop conversation button
106
- cancel = gr.Button("Stop Conversation", variant="stop")
107
- cancel.click(
108
- lambda: (AppState(stopped=True), gr.Audio(recording=False)),
109
- None, [state, input_audio], cancels=[respond, restart]
110
- )
111
-
112
- if __name__ == "__main__":
113
- demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import json
4
+ import uuid
5
+ from PIL import Image
6
+ from bs4 import BeautifulSoup
7
+ import requests
8
+ import random
9
+ from transformers import LlavaProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
10
+ from threading import Thread
11
+ import re
12
+ import time
13
+ import torch
14
+
15
+ # Initialize model and processor
16
+ model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
17
+ processor = LlavaProcessor.from_pretrained(model_id)
18
+ model = LlavaForConditionalGeneration.from_pretrained(model_id).to("cpu")
19
+
20
+ # Initialize inference clients for different models
21
+ client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
22
+ client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
23
+ client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
24
+ client_yi = InferenceClient("01-ai/Yi-1.5-34B-Chat")
25
+
26
+ def search(query):
27
+ """Performs a Google search and extracts text from the top results."""
28
+ session = requests.Session()
29
+ response = session.get(f"https://www.google.com/search?q={query}",
30
+ headers={"User-Agent": "Mozilla/5.0"})
31
+ soup = BeautifulSoup(response.text, "html.parser")
32
+ results = []
33
+ for result in soup.find_all("div", class_="BNeawe vvjwJb AP7Wnd"):
34
+ text = result.get_text()
35
+ link = result.find_parent("a")["href"]
36
+ results.append(f"{text}: {link}")
37
+ return "\n".join(results[:3])
38
+
39
+ def llava(inputs, history):
40
+ """Processes an image and text input with Llava."""
41
+ image = Image.open(inputs["files"][0]).convert("RGB")
42
+ prompt = f"<|im_start|>user <image>\n{inputs['text']}<|im_end|>"
43
+ processed = processor(prompt, image, return_tensors="pt").to("cpu")
44
+ return processed
45
+
46
+ def respond(message, history):
47
+ """Main response function for the chatbot."""
48
+ if "files" in message and message["files"]:
49
+ inputs = llava(message, history)
50
+ streamer = TextIteratorStreamer(skip_prompt=True, skip_special_tokens=True)
51
+ thread = Thread(target=model.generate, kwargs=dict(inputs=inputs, max_new_tokens=512, streamer=streamer))
52
+ thread.start()
53
+ buffer = ""
54
+ for new_text in streamer:
55
+ buffer += new_text
56
+ yield buffer
57
  else:
58
+ prompt = [{"role": "user", "content": msg[0]} for msg in history]
59
+ prompt.append({"role": "user", "content": message["text"]})
60
+ response = client_gemma.chat_completion(prompt, max_tokens=200)
61
+ yield response["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ def generate_image(prompt):
64
+ """Generates an image using the external model."""
65
+ client = InferenceClient("KingNish/Image-Gen-Pro")
66
+ return client.predict("Image Generation", None, prompt, api_name="/image_gen_pro")
67
 
68
+ # Set up Gradio interface
 
 
 
 
 
69
  with gr.Blocks() as demo:
70
+ chatbot = gr.Chatbot()
71
  with gr.Row():
72
  with gr.Column():
73
+ text_input = gr.Textbox(placeholder="Enter your message...")
74
+ file_input = gr.File(label="Upload an image")
75
  with gr.Column():
76
+ output = gr.Image(label="Generated Image")
77
+ with gr.Row():
78
+ search_button = gr.Button("Search Google")
79
+ image_button = gr.Button("Generate Image")
80
+ examples = [
81
+ {"text": "Who are you?"},
82
+ {"text": "Generate an image of the Eiffel Tower at night."},
83
+ {"text": "Search for the latest trends on YouTube."},
84
+ ]
85
+
86
+ def handle_text(text, state):
87
+ response = respond({"text": text}, state)
88
+ return response, state
89
+
90
+ def handle_file_upload(files, state):
91
+ response = respond({"files": files, "text": "Describe this image."}, state)
92
+ return response, state
93
+
94
+ # Connect components to callbacks
95
+ text_input.submit(handle_text, [text_input], [chatbot])
96
+ file_input.change(handle_file_upload, [file_input], [chatbot])
97
+
98
+ # Search button functionality
99
+ search_button.click(lambda query: search(query), [text_input], [chatbot])
100
+ image_button.click(lambda text: generate_image(text), [text_input], [output])
101
+
102
+ # Launch the Gradio interface
103
+ demo.launch()