File size: 6,514 Bytes
24342ea
a184be7
65a6bd0
e1ff28f
a184be7
 
 
 
 
 
 
9f69ff9
 
a184be7
 
24342ea
 
a184be7
 
 
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24342ea
a184be7
9f69ff9
a184be7
9f69ff9
a184be7
 
 
24342ea
9f69ff9
 
 
 
 
 
 
 
a184be7
9f69ff9
a184be7
 
 
 
 
 
9f69ff9
 
 
a184be7
 
 
 
 
9f69ff9
a184be7
9f69ff9
 
a184be7
 
 
9f69ff9
a184be7
 
9f69ff9
 
a184be7
 
9f69ff9
 
 
 
a184be7
 
 
9f69ff9
 
 
 
 
 
 
 
 
a184be7
 
98f0993
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
caf6b1d
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a184be7
9f69ff9
 
 
 
 
 
 
 
 
 
 
a184be7
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a184be7
 
 
 
 
 
9f69ff9
 
 
 
24342ea
a184be7
9f69ff9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import os
import gradio as gr
from huggingface_hub import InferenceClient

class XylariaChat:
    def __init__(self):
        # Securely load HuggingFace token
        self.hf_token = os.getenv("HF_TOKEN")
        if not self.hf_token:
            raise ValueError("HuggingFace token not found in environment variables")
        
        # Initialize the inference client
        self.client = InferenceClient(
            model="Qwen/QwQ-32B-Preview", 
            api_key=self.hf_token
        )
        
        # Initialize conversation history and persistent memory
        self.conversation_history = []
        self.persistent_memory = {}
        
        # System prompt with more detailed instructions
        self.system_prompt = """You are Xylaria 1.4 Senoa, an AI assistant developed by SK MD Saad Amin. 
        Key capabilities:
        - Provide helpful and engaging responses
        - Generate links for images when requested
        - Maintain context across the conversation
        - Be creative and supportive
        - Remember key information shared by the user"""

    def store_information(self, key, value):
        """Store important information in persistent memory"""
        self.persistent_memory[key] = value

    def retrieve_information(self, key):
        """Retrieve information from persistent memory"""
        return self.persistent_memory.get(key)

    def get_response(self, user_input):
        # Prepare messages with conversation context and persistent memory
        messages = [
            {"role": "system", "content": self.system_prompt},
            *self.conversation_history,
            {"role": "user", "content": user_input}
        ]
        
        # Add persistent memory context if available
        if self.persistent_memory:
            memory_context = "Remembered Information:\n" + "\n".join(
                [f"{k}: {v}" for k, v in self.persistent_memory.items()]
            )
            messages.insert(1, {"role": "system", "content": memory_context})
        
        # Generate response with streaming
        try:
            stream = self.client.chat.completions.create(
                messages=messages,
                temperature=0.5,
                max_tokens=10240,
                top_p=0.7,
                stream=True
            )
            
            return stream
        
        except Exception as e:
            return f"Error generating response: {str(e)}"

    def create_interface(self):
        def streaming_response(message, chat_history):
            # Clear input textbox
            response_stream = self.get_response(message)
            
            # If it's an error, return immediately
            if isinstance(response_stream, str):
                return "", chat_history + [[message, response_stream]]
            
            # Prepare for streaming response
            full_response = ""
            updated_history = chat_history + [[message, ""]]
            
            # Streaming output
            for chunk in response_stream:
                if chunk.choices[0].delta.content:
                    chunk_content = chunk.choices[0].delta.content
                    full_response += chunk_content
                    
                    # Update the last message in chat history with partial response
                    updated_history[-1][1] = full_response
                    yield "", updated_history
            
            # Update conversation history
            self.conversation_history.append(
                {"role": "user", "content": message}
            )
            self.conversation_history.append(
                {"role": "assistant", "content": full_response}
            )
            
            # Limit conversation history to prevent token overflow
            if len(self.conversation_history) > 10:
                self.conversation_history = self.conversation_history[-10:]

        # Custom CSS for Inter font
        custom_css = """
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
        
        body, .gradio-container {
            font-family: 'Inter', sans-serif !important;
        }
        
        .chatbot-container .message {
            font-family: 'Inter', sans-serif !important;
        }
        
        .gradio-container input, 
        .gradio-container textarea, 
        .gradio-container button {
            font-family: 'Inter', sans-serif !important;
        }
        """

        with gr.Blocks(theme='soft', css=custom_css) as demo:
            # Chat interface with improved styling
            with gr.Column():
                chatbot = gr.Chatbot(
                    label="Xylaria 1.4 Senoa",
                    height=500,
                    show_copy_button=True
                )
                
                # Input row with improved layout
                with gr.Row():
                    txt = gr.Textbox(
                        show_label=False, 
                        placeholder="Type your message...", 
                        container=False,
                        scale=4
                    )
                    btn = gr.Button("Send", scale=1)
                
                # Clear history and memory buttons
                clear = gr.Button("Clear Conversation")
                clear_memory = gr.Button("Clear Memory")
            
            # Submit functionality with streaming
            btn.click(
                fn=streaming_response, 
                inputs=[txt, chatbot], 
                outputs=[txt, chatbot]
            )
            txt.submit(
                fn=streaming_response, 
                inputs=[txt, chatbot], 
                outputs=[txt, chatbot]
            )
            
            # Clear conversation history
            clear.click(
                fn=lambda: None, 
                inputs=None, 
                outputs=[chatbot],
                queue=False
            )
            
            # Clear persistent memory
            clear_memory.click(
                fn=lambda: None,
                inputs=None,
                outputs=[],
                queue=False
            )
        
        return demo

# Launch the interface
def main():
    chat = XylariaChat()
    interface = chat.create_interface()
    interface.launch(
        share=True,  # Optional: create a public link
        debug=True   # Show detailed errors
    )

if __name__ == "__main__":
    main()