import gradio as gr from huggingface_hub import InferenceClient from datasets import load_dataset import faiss import numpy as np import os import time import json # āœ… Ensure FAISS is installed os.system("pip install faiss-cpu") def log(message): print(f"āœ… {message}") import os import json from datasets import load_dataset DATA_DIR = "data" os.makedirs(DATA_DIR, exist_ok=True) # Ensure directory exists # āœ… List of datasets datasets = { "sales": "goendalf666/sales-conversations", "blended": "blended_skill_talk", "dialog": "daily_dialog", "multiwoz": "multi_woz_v22", } # āœ… Save datasets to JSON for name, hf_name in datasets.items(): print(f"šŸ“„ Downloading {name} dataset...") dataset = load_dataset(hf_name) # Extract training data train_data = dataset["train"] # Convert dataset to list of dictionaries data_list = [dict(row) for row in train_data] # Save to JSON file_path = os.path.join(DATA_DIR, f"{name}.json") with open(file_path, "w") as f: json.dump(data_list, f, indent=2) print(f"āœ… {name} dataset saved to {file_path}") # āœ… Step 1: Run Embedding Script (Import and Run) log("šŸš€ Running embeddings script...") import embeddings # This will automatically run embeddings.py time.sleep(5) # Wait for embeddings to be created # āœ… Step 2: Check FAISS index def check_faiss(): index_path = "my_embeddings" # Adjust if needed try: index = faiss.read_index(index_path) num_vectors = index.ntotal dim = index.d if num_vectors > 0: return f"šŸ“Š FAISS index contains {num_vectors} vectors.\nāœ… Embedding dimension: {dim}" else: return "āš ļø No embeddings found in FAISS index!" except Exception as e: return f"āŒ ERROR: Failed to load FAISS index - {e}" log("šŸ” Checking FAISS embeddings...") faiss_status = check_faiss() log(faiss_status) # āœ… Step 3: Initialize chatbot client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3") def respond(message, history, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completions( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p ): token = message["choices"][0]["delta"]["content"] response += token yield response # āœ… Step 4: Start Chatbot Interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), ], ) log("āœ… All systems go! Launching chatbot...") if __name__ == "__main__": demo.launch()