import os import gradio as gr from huggingface_hub import InferenceClient from datasets import load_dataset import time import faiss import numpy as np # āœ… Install FAISS if missing os.system("pip install faiss-cpu") def log(message): print(f"āœ… {message}") # āœ… Load the datasets datasets = { "sales": load_dataset("goendalf666/sales-conversations", trust_remote_code=True), "blended": load_dataset("blended_skill_talk", trust_remote_code=True), "dialog": load_dataset("daily_dialog", trust_remote_code=True), "multiwoz": load_dataset("multi_woz_v22", trust_remote_code=True), } # Optional: Print dataset names and sizes for name, dataset in datasets.items(): print(f"{name}: {len(dataset['train'])} examples") # Initialize the model client (use correct model for chatbot) client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3") # Chatbot response function def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completions( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message["choices"][0]["delta"]["content"] response += token yield response # Gradio interface for chatbot demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) def start_embedding(): # Include your embedding logic here (from embeddings.py) log("Embedding started...") time.sleep(2) # Simulating embedding process log("Embedding process finished.") # Create Gradio interface with a button to start the embedding demo = gr.Interface( fn=start_embedding, inputs=None, outputs="text", live=True, title="Embedding Trigger" ) # āœ… Function to check FAISS index def check_faiss(): index_path = "my_embeddings" # Adjust if needed try: index = faiss.read_index(index_path) num_vectors = index.ntotal dim = index.d if num_vectors > 0: sample_vectors = index.reconstruct_n(0, min(5, num_vectors)) # Get first 5 embeddings return f"šŸ“Š FAISS index contains {num_vectors} vectors.\nāœ… Embedding dimension: {dim}\nšŸ§ Sample: {sample_vectors[:2]} ..." else: return "āš ļø No embeddings found in FAISS index!" except Exception as e: return f"āŒ ERROR: Failed to load FAISS index - {e}" # āœ… Add a Gradio button to trigger FAISS check with gr.Blocks() as demo: gr.Markdown("### šŸ” FAISS Embedding Check") check_button = gr.Button("šŸ”Ž Check FAISS Embeddings") output_text = gr.Textbox(label="FAISS Status", interactive=False) check_button.click(fn=check_faiss, outputs=output_text) # Launch Gradio app if __name__ == "__main__": demo.launch()