Spaces:

Lap1official
/

API

Running

App Files Files Community

Reality123b commited on Jan 7

Commit

fab81c0

verified ·

1 Parent(s): 1c0f709

Update app.py

Browse files

Files changed (1) hide show

app.py +520 -63

app.py CHANGED Viewed

@@ -11,6 +11,10 @@ import torch
 import numpy as np
 import networkx as nx
 from collections import Counter
 @dataclass
 class ChatMessage:
@@ -27,13 +31,15 @@ class XylariaChat:
             raise ValueError("HuggingFace token not found in environment variables")
         self.client = InferenceClient(
-            model="Qwen/QwQ-32B-Preview",
-            api_key=self.hf_token
         )
         self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
         self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
         self.conversation_history = []
         self.persistent_memory = []
         self.memory_embeddings = None
@@ -47,7 +53,7 @@ class XylariaChat:
             "bias_detection": 0.0,
             "strategy_adjustment": ""
         }
         self.internal_state = {
             "emotions": {
                 "valence": 0.5,
@@ -76,7 +82,7 @@ class XylariaChat:
         ]
         self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin. You should think step-by-step """
         self.causal_rules_db = {
             "rain": ["wet roads", "flooding"],
             "fire": ["heat", "smoke"],
@@ -90,6 +96,11 @@ class XylariaChat:
             "democracy": "government by the people",
             "photosynthesis": "process used by plants to convert light to energy"
         }
     def update_internal_state(self, emotion_deltas, cognitive_load_deltas, introspection_delta, engagement_delta):
         for emotion, delta in emotion_deltas.items():
@@ -117,7 +128,7 @@ class XylariaChat:
     def update_belief_system(self, statement, belief_score):
         self.belief_system[statement] = belief_score
     def dynamic_belief_update(self, user_message):
         sentences = [s.strip() for s in user_message.split('.') if s.strip()]
         sentence_counts = Counter(sentences)
@@ -223,7 +234,7 @@ class XylariaChat:
             return "Current strategy is effective. Continue with the current approach."
         else:
             return " ".join(adjustments)
     def introspect(self):
         introspection_report = "Introspection Report:\n"
         introspection_report += f"  Current Emotional State:\n"
@@ -273,7 +284,7 @@ class XylariaChat:
                     response = "I'm feeling quite energized and ready to assist! " + response
             else:
                 response = "I'm in a good mood and happy to help. " + response
         if curiosity > 0.7:
             response += " I'm very curious about this topic, could you tell me more?"
         if frustration > 0.5:
@@ -299,7 +310,7 @@ class XylariaChat:
                 if goal["goal"] == "Provide helpful, informative, and contextually relevant responses":
                     goal["priority"] = max(goal["priority"] - 0.1, 0.0)
                     goal["progress"] = max(goal["progress"] - 0.2, 0.0)
         if "learn more" in feedback_lower:
             for goal in self.goals:
                 if goal["goal"] == "Actively learn and adapt from interactions to improve conversational abilities":
@@ -310,7 +321,7 @@ class XylariaChat:
                 if goal["goal"] == "Maintain a coherent, engaging, and empathetic conversation flow":
                     goal["priority"] = max(goal["priority"] - 0.1, 0.0)
                     goal["progress"] = max(goal["progress"] - 0.2, 0.0)
         if self.internal_state["emotions"]["curiosity"] > 0.8:
             for goal in self.goals:
                 if goal["goal"] == "Identify and fill knowledge gaps by seeking external information":
@@ -387,8 +398,8 @@ class XylariaChat:
         try:
             self.client = InferenceClient(
-                model="Qwen/QwQ-32B-Preview",
-                api_key=self.hf_token
             )
         except Exception as e:
             print(f"Error resetting API client: {e}")
@@ -422,6 +433,13 @@ class XylariaChat:
         except Exception as e:
             return f"Error processing image: {str(e)}"
     def perform_math_ocr(self, image_path):
         try:
             img = Image.open(image_path)
@@ -429,9 +447,58 @@ class XylariaChat:
             return text.strip()
         except Exception as e:
             return f"Error during Math OCR: {e}"
     def get_response(self, user_input, image=None):
         try:
             messages = []
             messages.append(ChatMessage(
@@ -458,7 +525,7 @@ class XylariaChat:
                 role="user",
                 content=user_input
             ).to_dict())
             entities = []
             relationships = []
@@ -468,19 +535,19 @@ class XylariaChat:
                     extracted_relationships = self.extract_relationships(message['content'])
                     entities.extend(extracted_entities)
                     relationships.extend(extracted_relationships)
             self.update_knowledge_graph(entities, relationships)
             self.run_metacognitive_layer()
             for message in messages:
                 if message['role'] == 'user':
                     self.dynamic_belief_update(message['content'])
             for cause, effects in self.causal_rules_db.items():
                 if any(cause in msg['content'].lower() for msg in messages if msg['role'] == 'user') and any(
                         effect in msg['content'].lower() for msg in messages for effect in effects):
                     self.store_information("Causal Inference", f"It seems {cause} might be related to {', '.join(effects)}.")
             for concept, generalization in self.concept_generalizations.items():
                 if any(concept in msg['content'].lower() for msg in messages if msg['role'] == 'user'):
                     self.store_information("Inferred Knowledge", f"This reminds me of a general principle: {generalization}.")
@@ -488,28 +555,54 @@ class XylariaChat:
             if self.internal_state["emotions"]["curiosity"] > 0.8 and any("?" in msg['content'] for msg in messages if msg['role'] == 'user'):
                 print("Simulating external knowledge seeking...")
                 self.store_information("External Knowledge", "This is a placeholder for external information I would have found")
             self.store_information("User Input", user_input)
             input_tokens = sum(len(msg['content'].split()) for msg in messages)
             max_new_tokens = 16384 - input_tokens - 50
             max_new_tokens = min(max_new_tokens, 10020)
-            stream = self.client.chat_completion(
-                messages=messages,
-                model="Qwen/QwQ-32B-Preview",
-                temperature=0.7,
-                max_tokens=max_new_tokens,
-                top_p=0.9,
-                stream=True
-            )
-            return stream
         except Exception as e:
             print(f"Detailed error in get_response: {e}")
-            return f"Error generating response: {str(e)}"
     def extract_entities(self, text):
         words = text.split()
@@ -526,7 +619,7 @@ class XylariaChat:
                     if words[i].istitle() and words[i+2].istitle():
                         relationships.append((words[i], words[i+1], words[i+2]))
         return relationships
     def messages_to_prompt(self, messages):
         prompt = ""
         for msg in messages:
@@ -540,14 +633,165 @@ class XylariaChat:
         return prompt
     def create_interface(self):
-        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
             ocr_text = ""
             if math_ocr_image_path:
                 ocr_text = self.perform_math_ocr(math_ocr_image_path)
                 if ocr_text.startswith("Error"):
                     updated_history = chat_history + [[message, ocr_text]]
-                    yield "", updated_history, None, None
                     return
                 else:
                     message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
@@ -556,10 +800,10 @@ class XylariaChat:
                 response_stream = self.get_response(message, image_filepath)
             else:
                 response_stream = self.get_response(message)
             if isinstance(response_stream, str):
                 updated_history = chat_history + [[message, response_stream]]
-                yield "", updated_history, None, None
                 return
             full_response = ""
@@ -570,13 +814,13 @@ class XylariaChat:
                     if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
                         chunk_content = chunk.choices[0].delta.content
                         full_response += chunk_content
                         updated_history[-1][1] = full_response
-                        yield "", updated_history, None, None
             except Exception as e:
                 print(f"Streaming error: {e}")
                 updated_history[-1][1] = f"Error during response: {e}"
-                yield "", updated_history, None, None
                 return
             full_response = self.adjust_response_based_on_state(full_response)
@@ -609,14 +853,14 @@ class XylariaChat:
             else:
                 emotion_deltas.update({"valence": 0.05, "arousal": 0.05})
                 engagement_delta = 0.05
             if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower():
                 emotion_deltas.update({"curiosity": 0.3})
                 cognitive_load_deltas.update({"processing_intensity": 0.1})
                 engagement_delta = 0.2
             self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta)
             self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
             self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
@@ -624,41 +868,145 @@ class XylariaChat:
                 self.conversation_history = self.conversation_history[-10:]
         custom_css = """
-        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
-        body, .gradio-container {
-            font-family: 'Inter', sans-serif !important;
         }
         .chatbot-container .message {
-            font-family: 'Inter', sans-serif !important;
         }
         .gradio-container input,
         .gradio-container textarea,
         .gradio-container button {
-            font-family: 'Inter', sans-serif !important;
         }
         .image-container {
             display: flex;
             gap: 10px;
-            margin-bottom: 10px;
         }
         .image-upload {
-            border: 1px solid #ccc;
             border-radius: 8px;
-            padding: 10px;
-            background-color: #f8f8f8;
         }
         .image-preview {
-            max-width: 200px;
-            max-height: 200px;
             border-radius: 8px;
         }
         .clear-button {
             display: none;
         }
         .chatbot-container .message {
             opacity: 0;
             animation: fadeIn 0.5s ease-in-out forwards;
         }
         @keyframes fadeIn {
             from {
                 opacity: 0;
@@ -669,43 +1017,151 @@ class XylariaChat:
                 transform: translateY(0);
             }
         }
         .gr-accordion-button {
             background-color: #f0f0f0 !important;
             border-radius: 8px !important;
-            padding: 10px !important;
             margin-bottom: 10px !important;
             transition: all 0.3s ease !important;
             cursor: pointer !important;
         }
         .gr-accordion-button:hover {
             background-color: #e0e0e0 !important;
-            box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.1) !important;
         }
         .gr-accordion-active .gr-accordion-button {
             background-color: #d0d0d0 !important;
-            box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1) !important;
         }
         .gr-accordion-content {
             transition: max-height 0.3s ease-in-out !important;
             overflow: hidden !important;
             max-height: 0 !important;
         }
         .gr-accordion-active .gr-accordion-content {
             max-height: 500px !important;
         }
         .gr-accordion {
             display: flex;
             flex-direction: column-reverse;
         }
         """
-        with gr.Blocks(theme='soft', css=custom_css) as demo:
             with gr.Column():
                 chatbot = gr.Chatbot(
                     label="Xylaria 1.5 Senoa",
-                    height=500,
                     show_copy_button=True,
                 )
                 with gr.Accordion("Image Input", open=False, elem_classes="gr-accordion"):
                     with gr.Row(elem_classes="image-container"):
@@ -734,18 +1190,19 @@ class XylariaChat:
                     btn = gr.Button("Send", scale=1)
                 with gr.Row():
-                    clear = gr.Button("Clear Conversation")
                     clear_memory = gr.Button("Clear Memory")
                 btn.click(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img],
-                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 txt.submit(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img],
-                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 clear.click(

 import numpy as np
 import networkx as nx
 from collections import Counter
+import asyncio
+import edge_tts
+import speech_recognition as sr
+import random
 @dataclass
 class ChatMessage:
             raise ValueError("HuggingFace token not found in environment variables")
         self.client = InferenceClient(
+            model="Qwen/Qwen-32B-Preview",
+            token=self.hf_token
         )
         self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
         self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
+        self.image_gen_client = InferenceClient("black-forest-labs/FLUX.1-schnell", token=self.hf_token)
         self.conversation_history = []
         self.persistent_memory = []
         self.memory_embeddings = None
             "bias_detection": 0.0,
             "strategy_adjustment": ""
         }
         self.internal_state = {
             "emotions": {
                 "valence": 0.5,
         ]
         self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin. You should think step-by-step """
         self.causal_rules_db = {
             "rain": ["wet roads", "flooding"],
             "fire": ["heat", "smoke"],
             "democracy": "government by the people",
             "photosynthesis": "process used by plants to convert light to energy"
         }
+        # === Voice Mode Initialization (Start) ===
+        self.voice_mode_active = False
+        self.selected_voice = "en-US-JennyNeural"  # Default voice
+        # === Voice Mode Initialization (End) ===
     def update_internal_state(self, emotion_deltas, cognitive_load_deltas, introspection_delta, engagement_delta):
         for emotion, delta in emotion_deltas.items():
     def update_belief_system(self, statement, belief_score):
         self.belief_system[statement] = belief_score
     def dynamic_belief_update(self, user_message):
         sentences = [s.strip() for s in user_message.split('.') if s.strip()]
         sentence_counts = Counter(sentences)
             return "Current strategy is effective. Continue with the current approach."
         else:
             return " ".join(adjustments)
     def introspect(self):
         introspection_report = "Introspection Report:\n"
         introspection_report += f"  Current Emotional State:\n"
                     response = "I'm feeling quite energized and ready to assist! " + response
             else:
                 response = "I'm in a good mood and happy to help. " + response
         if curiosity > 0.7:
             response += " I'm very curious about this topic, could you tell me more?"
         if frustration > 0.5:
                 if goal["goal"] == "Provide helpful, informative, and contextually relevant responses":
                     goal["priority"] = max(goal["priority"] - 0.1, 0.0)
                     goal["progress"] = max(goal["progress"] - 0.2, 0.0)
         if "learn more" in feedback_lower:
             for goal in self.goals:
                 if goal["goal"] == "Actively learn and adapt from interactions to improve conversational abilities":
                 if goal["goal"] == "Maintain a coherent, engaging, and empathetic conversation flow":
                     goal["priority"] = max(goal["priority"] - 0.1, 0.0)
                     goal["progress"] = max(goal["progress"] - 0.2, 0.0)
         if self.internal_state["emotions"]["curiosity"] > 0.8:
             for goal in self.goals:
                 if goal["goal"] == "Identify and fill knowledge gaps by seeking external information":
         try:
             self.client = InferenceClient(
+                model="Qwen/Qwen-32B-Preview",
+                token=self.hf_token
             )
         except Exception as e:
             print(f"Error resetting API client: {e}")
         except Exception as e:
             return f"Error processing image: {str(e)}"
+    def generate_image(self, prompt):
+        try:
+            image = self.image_gen_client.text_to_image(prompt)
+            return image
+        except Exception as e:
+            return f"Error generating image: {e}"
     def perform_math_ocr(self, image_path):
         try:
             img = Image.open(image_path)
             return text.strip()
         except Exception as e:
             return f"Error during Math OCR: {e}"
+    # === Voice Mode Methods (Start) ===
+    async def speak_text(self, text):
+        if not text:
+            return None, None
+        temp_file = "temp_audio.mp3"
+        try:
+            communicator = edge_tts.Communicate(text, self.selected_voice)
+            await communicator.save(temp_file)
+            return temp_file
+        except Exception as e:
+            print(f"Error during text-to-speech: {e}")
+            return None, None
+    def recognize_speech(self, timeout=10, phrase_time_limit=10):
+        recognizer = sr.Recognizer()
+        recognizer.energy_threshold = 4000
+        recognizer.dynamic_energy_threshold = True
+        with sr.Microphone() as source:
+            print("Listening...")
+            try:
+                audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
+                print("Processing speech...")
+                text = recognizer.recognize_whisper_api(audio_data, api_key=self.hf_token)
+                print(f"Recognized: {text}")
+                return text
+            except sr.WaitTimeoutError:
+                print("No speech detected within the timeout period.")
+                return ""
+            except sr.UnknownValueError:
+                print("Speech recognition could not understand audio")
+                return ""
+            except sr.RequestError as e:
+                print(f"Could not request results from Whisper API; {e}")
+                return ""
+            except Exception as e:
+                print(f"An error occurred during speech recognition: {e}")
+                return ""
+    # === Voice Mode Methods (End) ===
     def get_response(self, user_input, image=None):
         try:
+            # === Voice Mode Adaptation (Start) ===
+            if self.voice_mode_active:
+                print("Voice mode is active, using speech recognition.")
+                user_input = self.recognize_speech()  # Get input from speech
+                if not user_input:
+                    return "I didn't hear anything." , None
+            # === Voice Mode Adaptation (End) ===
             messages = []
             messages.append(ChatMessage(
                 role="user",
                 content=user_input
             ).to_dict())
             entities = []
             relationships = []
                     extracted_relationships = self.extract_relationships(message['content'])
                     entities.extend(extracted_entities)
                     relationships.extend(extracted_relationships)
             self.update_knowledge_graph(entities, relationships)
             self.run_metacognitive_layer()
             for message in messages:
                 if message['role'] == 'user':
                     self.dynamic_belief_update(message['content'])
             for cause, effects in self.causal_rules_db.items():
                 if any(cause in msg['content'].lower() for msg in messages if msg['role'] == 'user') and any(
                         effect in msg['content'].lower() for msg in messages for effect in effects):
                     self.store_information("Causal Inference", f"It seems {cause} might be related to {', '.join(effects)}.")
             for concept, generalization in self.concept_generalizations.items():
                 if any(concept in msg['content'].lower() for msg in messages if msg['role'] == 'user'):
                     self.store_information("Inferred Knowledge", f"This reminds me of a general principle: {generalization}.")
             if self.internal_state["emotions"]["curiosity"] > 0.8 and any("?" in msg['content'] for msg in messages if msg['role'] == 'user'):
                 print("Simulating external knowledge seeking...")
                 self.store_information("External Knowledge", "This is a placeholder for external information I would have found")
             self.store_information("User Input", user_input)
             input_tokens = sum(len(msg['content'].split()) for msg in messages)
             max_new_tokens = 16384 - input_tokens - 50
             max_new_tokens = min(max_new_tokens, 10020)
+            # === Voice Mode Output (Start) ===
+            if self.voice_mode_active:
+                stream = self.client.chat_completion(
+                    messages=messages,
+                    model="Qwen/Qwen-32B-Preview",
+                    temperature=0.7,
+                    max_tokens=max_new_tokens,
+                    top_p=0.9,
+                    stream=True
+                )
+                full_response = ""
+                for chunk in stream:
+                    if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
+                        full_response += chunk.choices[0].delta.content
+                full_response = self.adjust_response_based_on_state(full_response)
+                audio_file = asyncio.run(self.speak_text(full_response))
+                # Update conversation history
+                self.conversation_history.append(ChatMessage(role="user", content=user_input).to_dict())
+                self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
+                return full_response, audio_file
+            # === Voice Mode Output (End) ===
+            else:
+                stream = self.client.chat_completion(
+                    messages=messages,
+                    model="Qwen/Qwen-32B-Preview",
+                    temperature=0.7,
+                    max_tokens=max_new_tokens,
+                    top_p=0.9,
+                    stream=True
+                )
+                return stream
         except Exception as e:
             print(f"Detailed error in get_response: {e}")
+            return f"Error generating response: {str(e)}", None
     def extract_entities(self, text):
         words = text.split()
                     if words[i].istitle() and words[i+2].istitle():
                         relationships.append((words[i], words[i+1], words[i+2]))
         return relationships
     def messages_to_prompt(self, messages):
         prompt = ""
         for msg in messages:
         return prompt
     def create_interface(self):
+         # === Voice-Specific UI Elements (Start) ===
+        def toggle_voice_mode(active_state):
+            self.voice_mode_active = active_state
+            if self.voice_mode_active:
+                # Get the list of available voices
+                voices = asyncio.run(edge_tts.list_voices())
+                voice_names = [voice['ShortName'] for voice in voices]
+                # Select a random voice from the list
+                random_voice = random.choice(voice_names)
+                self.selected_voice = random_voice
+                return gr.Button.update(value="Stop Voice Mode"), gr.Dropdown.update(value=random_voice)
+            else:
+                return gr.Button.update(value="Start Voice Mode"), gr.Dropdown.update(value=self.selected_voice)
+        def update_selected_voice(voice_name):
+            self.selected_voice = voice_name
+            return voice_name
+        # === Voice-Specific UI Elements (End) ===
+        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, voice_mode_state, selected_voice):
+            if self.voice_mode_active:
+                response_text, audio_output = self.get_response(message)
+                if isinstance(response_text, str):
+                    updated_history = chat_history + [[message, response_text]]
+                    if audio_output:
+                        yield updated_history, audio_output, None, None, ""
+                    else:
+                        yield updated_history, None, None, None, ""
+                else:
+                    full_response = ""
+                    updated_history = chat_history + [[message, ""]]
+                    try:
+                        for chunk in response_text:
+                            if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
+                                chunk_content = chunk.choices[0].delta.content
+                                full_response += chunk_content
+                                updated_history[-1][1] = full_response
+                                if audio_output:
+                                    yield updated_history, audio_output, None, None, ""
+                                else:
+                                    yield updated_history, None, None, None, ""
+                    except Exception as e:
+                        print(f"Streaming error: {e}")
+                        updated_history[-1][1] = f"Error during response: {e}"
+                        if audio_output:
+                            yield updated_history, audio_output, None, None, ""
+                        else:
+                            yield updated_history, None, None, None, ""
+                        return
+                    full_response = self.adjust_response_based_on_state(full_response)
+                    audio_file = asyncio.run(self.speak_text(full_response))
+                    self.update_goals(message)
+                    emotion_deltas = {}
+                                        cognitive_load_deltas = {}
+                    engagement_delta = 0
+                    if any(word in message.lower() for word in ["sad", "unhappy", "depressed", "down"]):
+                        emotion_deltas.update({"valence": -0.2, "arousal": 0.1, "confidence": -0.1, "sadness": 0.3, "joy": -0.2})
+                        engagement_delta = -0.1
+                    elif any(word in message.lower() for word in ["happy", "good", "great", "excited", "amazing"]):
+                        emotion_deltas.update({"valence": 0.2, "arousal": 0.2, "confidence": 0.1, "sadness": -0.2, "joy": 0.3})
+                        engagement_delta = 0.2
+                    elif any(word in message.lower() for word in ["angry", "mad", "furious", "frustrated"]):
+                        emotion_deltas.update({"valence": -0.3, "arousal": 0.3, "dominance": -0.2, "frustration": 0.2, "sadness": 0.1, "joy": -0.1})
+                        engagement_delta = -0.2
+                    elif any(word in message.lower() for word in ["scared", "afraid", "fearful", "anxious"]):
+                        emotion_deltas.update({"valence": -0.2, "arousal": 0.4, "dominance": -0.3, "confidence": -0.2, "sadness": 0.2})
+                        engagement_delta = -0.1
+                    elif any(word in message.lower() for word in ["surprise", "amazed", "astonished"]):
+                        emotion_deltas.update({"valence": 0.1, "arousal": 0.5, "dominance": 0.1, "curiosity": 0.3, "sadness": -0.1, "joy": 0.1})
+                        engagement_delta = 0.3
+                    elif any(word in message.lower() for word in ["confused", "uncertain", "unsure"]):
+                        cognitive_load_deltas.update({"processing_intensity": 0.2})
+                        emotion_deltas.update({"curiosity": 0.2, "confidence": -0.1, "sadness": 0.1})
+                        engagement_delta = 0.1
+                    else:
+                        emotion_deltas.update({"valence": 0.05, "arousal": 0.05})
+                        engagement_delta = 0.05
+                    if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower():
+                        emotion_deltas.update({"curiosity": 0.3})
+                        cognitive_load_deltas.update({"processing_intensity": 0.1})
+                        engagement_delta = 0.2
+                    self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta)
+                    self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
+                    self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
+                    if len(self.conversation_history) > 10:
+                        self.conversation_history = self.conversation_history[-10:]
+                    if audio_file:
+                        yield updated_history, audio_file, None, None, ""
+                    else:
+                        yield updated_history, None, None, None, ""
+            # Handling /image command for image generation
+            if "/image" in message:
+                image_prompt = message.replace("/image", "").strip()
+                # Updated placeholder SVG with animation and text
+                placeholder_image = "data:image/svg+xml," + requests.utils.quote(f'''
+                    <svg width="256" height="256" viewBox="0 0 256 256" xmlns="http://www.w3.org/2000/svg">
+                      <style>
+                        rect {{
+                          animation: fillAnimation 3s ease-in-out infinite;
+                        }}
+                        @keyframes fillAnimation {{
+                          0% {{ fill: #626262; }}
+                          50% {{ fill: #111111; }}
+                          100% {{ fill: #626262; }}
+                        }}
+                        text {{
+                          font-family: 'Helvetica Neue', Arial, sans-serif; /* Choose a good font */
+                          font-weight: 300; /* Slightly lighter font weight */
+                          text-shadow: 0px 2px 4px rgba(0, 0, 0, 0.4); /* Subtle shadow */
+                        }}
+                      </style>
+                      <rect width="256" height="256" rx="20" fill="#888888" />
+                      <text x="50%" y="50%" dominant-baseline="middle" text-anchor="middle" font-size="24" fill="white" opacity="0.8">
+                        <tspan>creating your image</tspan>
+                        <tspan x="50%" dy="1.2em">with xylaria iris</tspan>
+                      </text>
+                    </svg>
+                ''')
+                updated_history = chat_history + [[message, gr.Image(value=placeholder_image, type="pil", visible=True)]]
+                yield updated_history, None, None, None, ""
+                try:
+                    generated_image = self.generate_image(image_prompt)
+                    updated_history[-1][1] = gr.Image(value=generated_image, type="pil", visible=True)
+                    yield updated_history, None, None, None, ""
+                    self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
+                    self.conversation_history.append(ChatMessage(role="assistant", content="Image generated").to_dict())
+                    return
+                except Exception as e:
+                    updated_history[-1][1] = f"Error generating image: {e}"
+                    yield updated_history, None, None, None, ""
+                    return
             ocr_text = ""
             if math_ocr_image_path:
                 ocr_text = self.perform_math_ocr(math_ocr_image_path)
                 if ocr_text.startswith("Error"):
                     updated_history = chat_history + [[message, ocr_text]]
+                    yield updated_history, None, None, None, ""
                     return
                 else:
                     message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
                 response_stream = self.get_response(message, image_filepath)
             else:
                 response_stream = self.get_response(message)
             if isinstance(response_stream, str):
                 updated_history = chat_history + [[message, response_stream]]
+                yield updated_history, None, None, None, ""
                 return
             full_response = ""
                     if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
                         chunk_content = chunk.choices[0].delta.content
                         full_response += chunk_content
                         updated_history[-1][1] = full_response
+                        yield updated_history, None, None, None, ""
             except Exception as e:
                 print(f"Streaming error: {e}")
                 updated_history[-1][1] = f"Error during response: {e}"
+                yield updated_history, None, None, None, ""
                 return
             full_response = self.adjust_response_based_on_state(full_response)
             else:
                 emotion_deltas.update({"valence": 0.05, "arousal": 0.05})
                 engagement_delta = 0.05
             if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower():
                 emotion_deltas.update({"curiosity": 0.3})
                 cognitive_load_deltas.update({"processing_intensity": 0.1})
                 engagement_delta = 0.2
             self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta)
             self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
             self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
                 self.conversation_history = self.conversation_history[-10:]
         custom_css = """
+        @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap');
+        body {
+            background-color: #f5f5f5;
+            font-family: 'Source Sans Pro', sans-serif;
+        }
+        .voice-mode-button {
+            background-color: #4CAF50; /* Green */
+            border: none;
+            color: white;
+            padding: 15px 32px;
+            text-align: center;
+            text-decoration: none;
+            display: inline-block;
+            font-size: 16px;
+            margin: 4px 2px;
+            cursor: pointer;
+            border-radius: 10px; /* Rounded corners */
+            transition: all 0.3s ease; /* Smooth transition for hover effect */
+        }
+        /* Style when voice mode is active */
+        .voice-mode-button.active {
+            background-color: #f44336; /* Red */
+        }
+        /* Hover effect */
+        .voice-mode-button:hover {
+            opacity: 0.8;
+        }
+        /* Style for the voice mode overlay */
+        .voice-mode-overlay {
+            position: fixed; /* Stay in place */
+            left: 0;
+            top: 0;
+            width: 100%; /* Full width */
+            height: 100%; /* Full height */
+            background-color: rgba(0, 0, 0, 0.7); /* Black w/ opacity */
+            z-index: 10; /* Sit on top */
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            border-radius: 10px;
+        }
+        /* Style for the growing circle */
+        .voice-mode-circle {
+            width: 100px;
+            height: 100px;
+            background-color: #4CAF50;
+            border-radius: 50%;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            animation: grow 2s infinite;
         }
+        /* Keyframes for the growing animation */
+        @keyframes grow {
+            0% {
+                transform: scale(1);
+                opacity: 0.8;
+            }
+            50% {
+                transform: scale(1.5);
+                opacity: 0.5;
+            }
+            100% {
+                transform: scale(1);
+                opacity: 0.8;
+            }
+        }
+        .gradio-container {
+            max-width: 900px;
+            margin: 0 auto;
+            border-radius: 10px;
+            box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.1);
+        }
+        .chatbot-container {
+            background-color: #fff;
+            border-radius: 10px;
+            padding: 20px;
+        }
         .chatbot-container .message {
+            font-family: 'Source Sans Pro', sans-serif;
+            font-size: 16px;
+            line-height: 1.6;
         }
         .gradio-container input,
         .gradio-container textarea,
         .gradio-container button {
+            font-family: 'Source Sans Pro', sans-serif;
+            font-size: 16px;
+            border-radius: 8px;
         }
         .image-container {
             display: flex;
             gap: 10px;
+            margin-bottom: 20px;
+            justify-content: center;
         }
         .image-upload {
+            border: 2px dashed #d3d3d3;
             border-radius: 8px;
+            padding: 20px;
+            background-color: #fafafa;
+            text-align: center;
+            transition: all 0.3s ease;
+        }
+        .image-upload:hover {
+            background-color: #f0f0f0;
+            border-color: #b3b3b3;
         }
         .image-preview {
+            max-width: 150px;
+            max-height: 150px;
             border-radius: 8px;
+            box-shadow: 0px 2px 5px rgba(0, 0, 0, 0.1);
         }
         .clear-button {
             display: none;
         }
         .chatbot-container .message {
             opacity: 0;
             animation: fadeIn 0.5s ease-in-out forwards;
         }
         @keyframes fadeIn {
             from {
                 opacity: 0;
                 transform: translateY(0);
             }
         }
         .gr-accordion-button {
             background-color: #f0f0f0 !important;
             border-radius: 8px !important;
+            padding: 15px !important;
             margin-bottom: 10px !important;
             transition: all 0.3s ease !important;
             cursor: pointer !important;
+            border: none !important;
+            box-shadow: 0px 2px 5px rgba(0, 0, 0, 0.05) !important;
         }
         .gr-accordion-button:hover {
             background-color: #e0e0e0 !important;
+            box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1) !important;
         }
         .gr-accordion-active .gr-accordion-button {
             background-color: #d0d0d0 !important;
+            box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1) !important;
         }
         .gr-accordion-content {
             transition: max-height 0.3s ease-in-out !important;
             overflow: hidden !important;
             max-height: 0 !important;
         }
         .gr-accordion-active .gr-accordion-content {
             max-height: 500px !important;
         }
         .gr-accordion {
             display: flex;
             flex-direction: column-reverse;
         }
+        .chatbot-icon {
+            width: 40px;
+            height: 40px;
+            border-radius: 50%;
+            margin-right: 10px;
+        }
+        .user-message .message-row {
+            background-color: #e8f0fe;
+            border-radius: 10px;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-top-right-radius: 2px;
+        }
+        .assistant-message .message-row {
+            background-color: #f0f0f0;
+            border-radius: 10px;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-top-left-radius: 2px;
+        }
+        .user-message .message-icon {
+            background: url('https://img.icons8.com/color/48/000000/user.png') no-repeat center center;
+            background-size: contain;
+            width: 30px;
+            height: 30px;
+            margin-right: 10px;
+        }
+        .assistant-message .message-icon {
+            background: url('https://i.ibb.co/7b7hLGH/Senoa-Icon-1.png') no-repeat center center;
+            background-size: cover;
+            width: 40px;
+            height: 40px;
+            margin-right: 10px;
+            border-radius: 50%;
+        }
+        .message-text {
+            flex-grow: 1;
+        }
+        .message-row {
+            display: flex;
+            align-items: center;
+        }
+        .audio-container {
+            display: flex;
+            align-items: center;
+            margin-top: 10px;
+        }
+        .audio-player {
+            width: 100%;
+            border-radius: 15px;
+        }
+        .audio-icon {
+            width: 30px;
+            height: 30px;
+            margin-right: 10px;
+        }
         """
+        with gr.Blocks(theme=gr.themes.Soft(
+            primary_hue="slate",
+            secondary_hue="gray",
+            neutral_hue="gray",
+            font=["Source Sans Pro", "Arial", "sans-serif"],
+        ), css=custom_css) as demo:
             with gr.Column():
                 chatbot = gr.Chatbot(
                     label="Xylaria 1.5 Senoa",
+                    height=600,
                     show_copy_button=True,
+                    elem_classes="chatbot-container",
+                    avatar_images=(
+                        "https://img.icons8.com/color/48/000000/user.png",  # User avatar
+                        "https://i.ibb.co/7b7hLGH/Senoa-Icon-1.png"  # Bot avatar
+                    )
+                )
+                # === Voice Mode UI (Start) ===
+                voice_mode_btn = gr.Button("Start Voice Mode", elem_classes="voice-mode-button")
+                voices = asyncio.run(edge_tts.list_voices())
+                voice_names = [voice['ShortName'] for voice in voices]
+                voice_dropdown = gr.Dropdown(
+                    label="Select Voice",
+                    choices=voice_names,
+                    value=self.selected_voice,
+                    interactive=True
+                )
+                voice_dropdown.input(
+                    fn=update_selected_voice,
+                    inputs=voice_dropdown,
+                    outputs=voice_dropdown
+                )
+                voice_mode_btn.click(
+                    fn=toggle_voice_mode,
+                    inputs=voice_mode_btn,
+                    outputs=[voice_mode_btn, voice_dropdown]
                 )
+                # === Voice Mode UI (End) ===
                 with gr.Accordion("Image Input", open=False, elem_classes="gr-accordion"):
                     with gr.Row(elem_classes="image-container"):
                     btn = gr.Button("Send", scale=1)
                 with gr.Row():
+                    clear = gr.Button("Clear Conversation", variant="stop")
                     clear_memory = gr.Button("Clear Memory")
+                # Pass voice_mode_state and selected_voice to the streaming_response function
                 btn.click(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img, voice_mode_btn, voice_dropdown],
+                    outputs=[chatbot, gr.Audio(label="Audio Response", type="filepath", autoplay=True, visible=True), img, math_ocr_img, txt]
                 )
                 txt.submit(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img, voice_mode_btn, voice_dropdown],
+                    outputs=[chatbot, gr.Audio(label="Audio Response", type="filepath", autoplay=True, visible=True), img, math_ocr_img, txt]
                 )
                 clear.click(