Spaces:

Lap1official
/

API

Running

API

File size: 6,514 Bytes

24342ea
a184be7
65a6bd0
e1ff28f
a184be7
 
 
 
 
 
 
9f69ff9
 
a184be7
 
24342ea
 
a184be7
 
 
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24342ea
a184be7
9f69ff9
a184be7
9f69ff9
a184be7
 
 
24342ea
9f69ff9
 
 
 
 
 
 
 
a184be7
9f69ff9
a184be7
 
 
 
 
 
9f69ff9
 
 
a184be7
 
 
 
 
9f69ff9
a184be7
9f69ff9
 
a184be7
 
 
9f69ff9
a184be7
 
9f69ff9
 
a184be7
 
9f69ff9
 
 
 
a184be7
 
 
9f69ff9
 
 
 
 
 
 
 
 
a184be7
 
98f0993
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
caf6b1d
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a184be7
9f69ff9
 
 
 
 
 
 
 
 
 
 
a184be7
9f69ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a184be7
 
 
 
 
 
9f69ff9
 
 
 
24342ea
a184be7
9f69ff9

import os
import gradio as gr
from huggingface_hub import InferenceClient

class XylariaChat:
    def __init__(self):
        # Securely load HuggingFace token
        self.hf_token = os.getenv("HF_TOKEN")
        if not self.hf_token:
            raise ValueError("HuggingFace token not found in environment variables")
        
        # Initialize the inference client
        self.client = InferenceClient(
            model="Qwen/QwQ-32B-Preview", 
            api_key=self.hf_token
        )
        
        # Initialize conversation history and persistent memory
        self.conversation_history = []
        self.persistent_memory = {}
        
        # System prompt with more detailed instructions
        self.system_prompt = """You are Xylaria 1.4 Senoa, an AI assistant developed by SK MD Saad Amin. 
        Key capabilities:
        - Provide helpful and engaging responses
        - Generate links for images when requested
        - Maintain context across the conversation
        - Be creative and supportive
        - Remember key information shared by the user"""

    def store_information(self, key, value):
        """Store important information in persistent memory"""
        self.persistent_memory[key] = value

    def retrieve_information(self, key):
        """Retrieve information from persistent memory"""
        return self.persistent_memory.get(key)

    def get_response(self, user_input):
        # Prepare messages with conversation context and persistent memory
        messages = [
            {"role": "system", "content": self.system_prompt},
            *self.conversation_history,
            {"role": "user", "content": user_input}
        ]
        
        # Add persistent memory context if available
        if self.persistent_memory:
            memory_context = "Remembered Information:\n" + "\n".join(
                [f"{k}: {v}" for k, v in self.persistent_memory.items()]
            )
            messages.insert(1, {"role": "system", "content": memory_context})
        
        # Generate response with streaming
        try:
            stream = self.client.chat.completions.create(
                messages=messages,
                temperature=0.5,
                max_tokens=10240,
                top_p=0.7,
                stream=True
            )
            
            return stream
        
        except Exception as e:
            return f"Error generating response: {str(e)}"

    def create_interface(self):
        def streaming_response(message, chat_history):
            # Clear input textbox
            response_stream = self.get_response(message)
            
            # If it's an error, return immediately
            if isinstance(response_stream, str):
                return "", chat_history + [[message, response_stream]]
            
            # Prepare for streaming response
            full_response = ""
            updated_history = chat_history + [[message, ""]]
            
            # Streaming output
            for chunk in response_stream:
                if chunk.choices[0].delta.content:
                    chunk_content = chunk.choices[0].delta.content
                    full_response += chunk_content
                    
                    # Update the last message in chat history with partial response
                    updated_history[-1][1] = full_response
                    yield "", updated_history
            
            # Update conversation history
            self.conversation_history.append(
                {"role": "user", "content": message}
            )
            self.conversation_history.append(
                {"role": "assistant", "content": full_response}
            )
            
            # Limit conversation history to prevent token overflow
            if len(self.conversation_history) > 10:
                self.conversation_history = self.conversation_history[-10:]

        # Custom CSS for Inter font
        custom_css = """
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
        
        body, .gradio-container {
            font-family: 'Inter', sans-serif !important;
        }
        
        .chatbot-container .message {
            font-family: 'Inter', sans-serif !important;
        }
        
        .gradio-container input, 
        .gradio-container textarea, 
        .gradio-container button {
            font-family: 'Inter', sans-serif !important;
        }
        """

        with gr.Blocks(theme='soft', css=custom_css) as demo:
            # Chat interface with improved styling
            with gr.Column():
                chatbot = gr.Chatbot(
                    label="Xylaria 1.4 Senoa",
                    height=500,
                    show_copy_button=True
                )
                
                # Input row with improved layout
                with gr.Row():
                    txt = gr.Textbox(
                        show_label=False, 
                        placeholder="Type your message...", 
                        container=False,
                        scale=4
                    )
                    btn = gr.Button("Send", scale=1)
                
                # Clear history and memory buttons
                clear = gr.Button("Clear Conversation")
                clear_memory = gr.Button("Clear Memory")
            
            # Submit functionality with streaming
            btn.click(
                fn=streaming_response, 
                inputs=[txt, chatbot], 
                outputs=[txt, chatbot]
            )
            txt.submit(
                fn=streaming_response, 
                inputs=[txt, chatbot], 
                outputs=[txt, chatbot]
            )
            
            # Clear conversation history
            clear.click(
                fn=lambda: None, 
                inputs=None, 
                outputs=[chatbot],
                queue=False
            )
            
            # Clear persistent memory
            clear_memory.click(
                fn=lambda: None,
                inputs=None,
                outputs=[],
                queue=False
            )
        
        return demo

# Launch the interface
def main():
    chat = XylariaChat()
    interface = chat.create_interface()
    interface.launch(
        share=True,  # Optional: create a public link
        debug=True   # Show detailed errors
    )

if __name__ == "__main__":
    main()