import gradio as gr from huggingface_hub import InferenceClient import faiss import numpy as np import os import time import threading # āœ… Run embeddings in parallel # āœ… Ensure FAISS is installed os.system("pip install faiss-cpu") def log(message): print(f"āœ… {message}") # āœ… Step 1: Run Embeddings in a Separate Thread def run_embeddings(): log("šŸš€ Running embeddings script in background...") import embeddings # āœ… This will automatically run embeddings.py log("āœ… Embeddings process finished.") embedding_thread = threading.Thread(target=run_embeddings) embedding_thread.start() # āœ… Start embedding in background # āœ… Step 2: Check FAISS index def check_faiss(): index_path = "my_embeddings.faiss" # Ensure file has .faiss extension if not os.path.exists(index_path): return "āš ļø No FAISS index found! Embeddings might still be processing." try: index = faiss.read_index(index_path) num_vectors = index.ntotal dim = index.d return f"šŸ“Š FAISS index contains {num_vectors} vectors.\nāœ… Embedding dimension: {dim}" except Exception as e: return f"āŒ ERROR: Failed to load FAISS index - {e}" log("šŸ” Checking FAISS embeddings...") faiss_status = check_faiss() log(faiss_status) # āœ… Step 3: Initialize Chatbot client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3") def respond(message, history, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completions( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p ): token = message["choices"][0]["delta"]["content"] response += token yield response # āœ… Step 4: Start Chatbot Interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), ], ) log("āœ… All systems go! Launching chatbot...") if __name__ == "__main__": demo.launch()