File size: 3,273 Bytes
b6a467a 54072ad 4e2c9cd cd618c5 29d1f72 241003b cd618c5 241003b b6a467a 5606667 29d1f72 e925ddf 29d1f72 e925ddf 29d1f72 e925ddf 54072ad e925ddf 54072ad 29d1f72 e925ddf 29d1f72 cd618c5 e63c0a3 cd618c5 b6a467a cd618c5 54072ad b6a467a cd618c5 b6a467a 54072ad cd618c5 b6a467a 54072ad b6a467a cd618c5 b6a467a cd618c5 b6a467a cd618c5 b6a467a cd618c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
import faiss
import numpy as np
import os
import time
import json
# β
Ensure FAISS is installed
os.system("pip install faiss-cpu")
def log(message):
print(f"β
{message}")
import os
import json
from datasets import load_dataset
DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True) # Ensure directory exists
# β
List of datasets
datasets = {
"sales": "goendalf666/sales-conversations",
"blended": "blended_skill_talk",
"dialog": "daily_dialog",
"multiwoz": "multi_woz_v22",
}
# β
Save datasets to JSON
for name, hf_name in datasets.items():
print(f"π₯ Downloading {name} dataset...")
dataset = load_dataset(hf_name)
# Extract training data
train_data = dataset["train"]
# Convert dataset to list of dictionaries
data_list = [dict(row) for row in train_data]
# Save to JSON
file_path = os.path.join(DATA_DIR, f"{name}.json")
with open(file_path, "w") as f:
json.dump(data_list, f, indent=2)
print(f"β
{name} dataset saved to {file_path}")
# β
Step 1: Run Embedding Script (Import and Run)
log("π Running embeddings script...")
import embeddings # This will automatically run embeddings.py
time.sleep(5) # Wait for embeddings to be created
# β
Step 2: Check FAISS index
def check_faiss():
index_path = "my_embeddings" # Adjust if needed
try:
index = faiss.read_index(index_path)
num_vectors = index.ntotal
dim = index.d
if num_vectors > 0:
return f"π FAISS index contains {num_vectors} vectors.\nβ
Embedding dimension: {dim}"
else:
return "β οΈ No embeddings found in FAISS index!"
except Exception as e:
return f"β ERROR: Failed to load FAISS index - {e}"
log("π Checking FAISS embeddings...")
faiss_status = check_faiss()
log(faiss_status)
# β
Step 3: Initialize chatbot
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
def respond(message, history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completions(
messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
):
token = message["choices"][0]["delta"]["content"]
response += token
yield response
# β
Step 4: Start Chatbot Interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
log("β
All systems go! Launching chatbot...")
if __name__ == "__main__":
demo.launch()
|