import gradio as gr import requests import logging import json import os import numpy as np # Set up logging to help troubleshoot issues logging.basicConfig(level=logging.DEBUG) # LM Studio REST API base URL BASE_URL = "http://localhost:1234/v1" # Function to handle chat completions with streaming support def chat_with_lmstudio(messages): url = f"{BASE_URL}/chat/completions" payload = { "model": "bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-IQ2_M.gguf", # Replace with your chat model "messages": messages, "temperature": 0.7, "max_tokens": 4096, "stream": True } logging.debug(f"Sending POST request to URL: {url}") logging.debug(f"Payload: {json.dumps(payload, indent=2)}") try: with requests.post(url, json=payload, stream=True) as response: logging.debug(f"Response Status Code: {response.status_code}") response.raise_for_status() collected_response = "" for chunk in response.iter_lines(): if chunk: chunk_data = chunk.decode('utf-8').strip() if chunk_data == "[DONE]": logging.debug("Received [DONE] signal. Ending stream.") break if chunk_data.startswith("data: "): chunk_data = chunk_data[6:].strip() logging.debug(f"Received Chunk: {chunk_data}") try: response_data = json.loads(chunk_data) if "choices" in response_data and len(response_data["choices"]) > 0: content = response_data['choices'][0].get('delta', {}).get('content', "") collected_response += content yield content except json.JSONDecodeError: logging.error(f"Failed to decode JSON from chunk: {chunk_data}") if not collected_response: yield "I'm sorry, I couldn't generate a response. Could you please try again?" except requests.exceptions.RequestException as e: logging.error(f"Request to LM Studio failed: {e}") yield "An error occurred while connecting to LM Studio. Please try again later." # Function to get embeddings from LM Studio def get_embeddings(text): url = f"{BASE_URL}/embeddings" payload = { "model": "nomad_embed_text_v1_5_Q8_0", # Use the exact model name registered in LM Studio "input": text } logging.debug(f"Sending POST request to URL: {url}") logging.debug(f"Payload: {json.dumps(payload, indent=2)}") try: response = requests.post(url, json=payload) response.raise_for_status() data = response.json() embedding = data['data'][0]['embedding'] logging.debug(f"Received Embedding: {embedding}") return embedding except requests.exceptions.RequestException as e: logging.error(f"Request to LM Studio for embeddings failed: {e}") return None # Function to calculate cosine similarity def cosine_similarity(vec1, vec2): if not vec1 or not vec2: return 0 vec1 = np.array(vec1) vec2 = np.array(vec2) if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0: return 0 return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) # Gradio Blocks interface for chat with file upload and embeddings def gradio_chat_interface(): with gr.Blocks() as iface: gr.Markdown("# Chat with LM Studio 🚀") gr.Markdown("A chat interface powered by LM Studio. You can send text messages or upload files (e.g., `.txt`) to include in the conversation.") chatbot = gr.Chatbot(type='messages') # Specify 'messages' type to avoid deprecated tuple format state = gr.State([]) # To store conversation history as list of dicts embeddings_state = gr.State([]) # To store embeddings with gr.Row(): with gr.Column(scale=4): user_input = gr.Textbox( label="Type your message here", placeholder="Enter text and press enter", lines=1 ) with gr.Column(scale=1): file_input = gr.File( label="Upload a file", file_types=[".txt"], # Restrict to text files; modify as needed type="binary" # Corrected from 'file' to 'binary' ) send_button = gr.Button("Send") # Function to handle chat interactions def chat_interface(user_message, uploaded_file, history, embeddings): # Initialize history and embeddings if None if history is None: history = [] if embeddings is None: embeddings = [] # Process uploaded file if present if uploaded_file is not None: try: # Read the uploaded file's content file_content = uploaded_file.read().decode('utf-8') user_message += f"\n\n[File Content]:\n{file_content}" logging.debug(f"Processed uploaded file: {uploaded_file.name}") # Generate embedding for the file content file_embedding = get_embeddings(file_content) if file_embedding: embeddings.append((file_content, file_embedding)) logging.debug(f"Stored embedding for uploaded file: {uploaded_file.name}") except Exception as e: logging.error(f"Error reading uploaded file: {e}") user_message += "\n\n[Error reading the uploaded file.]" # Generate embedding for the user message user_embedding = get_embeddings(user_message) if user_embedding: embeddings.append((user_message, user_embedding)) logging.debug("Stored embedding for user message.") # Retrieve relevant context based on embeddings (optional) # For demonstration, we'll retrieve top 2 similar past messages context_messages = [] if embeddings: similarities = [] for idx, (text, embed) in enumerate(embeddings[:-1]): # Exclude the current user message sim = cosine_similarity(user_embedding, embed) similarities.append((sim, idx)) # Sort by similarity similarities.sort(reverse=True, key=lambda x: x[0]) top_n = 2 top_indices = [idx for (_, idx) in similarities[:top_n]] for idx in top_indices: context_messages.append(history[idx]['content']) # Append user messages as context # Append user message to history history.append({"role": "user", "content": user_message}) logging.debug(f"Updated History: {history}") # Format history with additional context messages = [] if context_messages: messages.append({"role": "system", "content": "You have the following context:"}) for ctx in context_messages: messages.append({"role": "user", "content": ctx}) messages.append({"role": "system", "content": "Use this context to assist the user."}) # Append all messages from history messages.extend(history) # Get response from LM Studio response_stream = chat_with_lmstudio(messages) response = "" # To handle streaming, we'll initialize the assistant message and update it incrementally assistant_message = {"role": "assistant", "content": ""} history.append(assistant_message) logging.debug(f"Appended empty assistant message: {assistant_message}") for chunk in response_stream: response += chunk # Update the assistant message content assistant_message['content'] = response logging.debug(f"Updated assistant message: {assistant_message}") # Yield the updated history and embeddings yield history, embeddings # Finalize the history with the complete response assistant_message['content'] = response logging.debug(f"Final assistant message: {assistant_message}") yield history, embeddings # Connect the send button to the chat function send_button.click( fn=chat_interface, inputs=[user_input, file_input, state, embeddings_state], outputs=[chatbot, embeddings_state], queue=True # Enable queuing for handling multiple requests ) # Also allow pressing Enter in the textbox to send the message user_input.submit( fn=chat_interface, inputs=[user_input, file_input, state, embeddings_state], outputs=[chatbot, embeddings_state], queue=True ) # Add debug statements to determine file pattern issues logging.debug(f"Current working directory: {os.getcwd()}") logging.debug(f"Files in current directory: {os.listdir(os.getcwd())}") iface.launch(share=True) # Main function to launch the chat interface if __name__ == "__main__": gradio_chat_interface()