Spaces:
Running
Running
File size: 6,514 Bytes
24342ea a184be7 65a6bd0 e1ff28f a184be7 9f69ff9 a184be7 24342ea a184be7 9f69ff9 24342ea a184be7 9f69ff9 a184be7 9f69ff9 a184be7 24342ea 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 98f0993 9f69ff9 caf6b1d 9f69ff9 abe40f9 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 a184be7 9f69ff9 24342ea a184be7 abe40f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
class XylariaChat:
def __init__(self):
# Securely load HuggingFace token
self.hf_token = os.getenv("HF_TOKEN")
if not self.hf_token:
raise ValueError("HuggingFace token not found in environment variables")
# Initialize the inference client
self.client = InferenceClient(
model="Qwen/QwQ-32B-Preview",
api_key=self.hf_token
)
# Initialize conversation history and persistent memory
self.conversation_history = []
self.persistent_memory = {}
# System prompt with more detailed instructions
self.system_prompt = """You are Xylaria 1.4 Senoa, an AI assistant developed by SK MD Saad Amin.
Key capabilities:
- Provide helpful and engaging responses
- Generate links for images when requested
- Maintain context across the conversation
- Be creative and supportive
- Remember key information shared by the user"""
def store_information(self, key, value):
"""Store important information in persistent memory"""
self.persistent_memory[key] = value
def retrieve_information(self, key):
"""Retrieve information from persistent memory"""
return self.persistent_memory.get(key)
def get_response(self, user_input):
# Prepare messages with conversation context and persistent memory
messages = [
{"role": "system", "content": self.system_prompt},
*self.conversation_history,
{"role": "user", "content": user_input}
]
# Add persistent memory context if available
if self.persistent_memory:
memory_context = "Remembered Information:\n" + "\n".join(
[f"{k}: {v}" for k, v in self.persistent_memory.items()]
)
messages.insert(1, {"role": "system", "content": memory_context})
# Generate response with streaming
try:
stream = self.client.chat.completions.create(
messages=messages,
temperature=0.5,
max_tokens=10240,
top_p=0.7,
stream=True
)
return stream
except Exception as e:
return f"Error generating response: {str(e)}"
def create_interface(self):
def streaming_response(message, chat_history):
# Clear input textbox
response_stream = self.get_response(message)
# If it's an error, return immediately
if isinstance(response_stream, str):
return "", chat_history + [[message, response_stream]]
# Prepare for streaming response
full_response = ""
updated_history = chat_history + [[message, ""]]
# Streaming output
for chunk in response_stream:
if chunk.choices[0].delta.content:
chunk_content = chunk.choices[0].delta.content
full_response += chunk_content
# Update the last message in chat history with partial response
updated_history[-1][1] = full_response
yield "", updated_history
# Update conversation history
self.conversation_history.append(
{"role": "user", "content": message}
)
self.conversation_history.append(
{"role": "assistant", "content": full_response}
)
# Limit conversation history to prevent token overflow
if len(self.conversation_history) > 10:
self.conversation_history = self.conversation_history[-10:]
# Custom CSS for Inter font
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
body, .gradio-container {
font-family: 'Inter', sans-serif !important;
}
.chatbot-container .message {
font-family: 'Inter', sans-serif !important;
}
.gradio-container input,
.gradio-container textarea,
.gradio-container button {
font-family: 'Inter', sans-serif !important;
}
"""
with gr.Blocks(theme='soft', css=custom_css) as demo:
# Chat interface with improved styling
with gr.Column():
chatbot = gr.Chatbot(
label="Xylaria 1.4 Senoa",
height=500,
show_copy_button=True
)
# Input row with improved layout
with gr.Row():
txt = gr.Textbox(
show_label=False,
placeholder="Type your message...",
container=False,
scale=4
)
btn = gr.Button("Send", scale=1)
# Clear history and memory buttons
clear = gr.Button("Clear Conversation")
clear_memory = gr.Button("Clear Memory")
# Submit functionality with streaming
btn.click(
fn=streaming_response,
inputs=[txt, chatbot],
outputs=[txt, chatbot]
)
txt.submit(
fn=streaming_response,
inputs=[txt, chatbot],
outputs=[txt, chatbot]
)
# Clear conversation history
clear.click(
fn=lambda: None,
inputs=None,
outputs=[chatbot],
queue=False
)
# Clear persistent memory
clear_memory.click(
fn=lambda: None,
inputs=None,
outputs=[],
queue=False
)
return demo
# Launch the interface
def main():
chat = XylariaChat()
interface = chat.create_interface()
interface.launch(
share=True, # Optional: create a public link
debug=True # Show detailed errors
)
if __name__ == "__main__":
main() |