File size: 3,273 Bytes
b6a467a
 
54072ad
4e2c9cd
 
cd618c5
 
29d1f72
241003b
cd618c5
241003b
b6a467a
5606667
 
 
29d1f72
e925ddf
 
 
 
29d1f72
e925ddf
29d1f72
e925ddf
54072ad
e925ddf
 
 
 
54072ad
29d1f72
e925ddf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29d1f72
cd618c5
 
 
 
e63c0a3
cd618c5
b6a467a
cd618c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54072ad
b6a467a
cd618c5
b6a467a
 
 
 
 
 
 
 
 
 
 
54072ad
cd618c5
b6a467a
54072ad
b6a467a
 
 
cd618c5
b6a467a
 
 
 
 
 
cd618c5
b6a467a
 
 
cd618c5
b6a467a
 
cd618c5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
import faiss
import numpy as np
import os
import time
import json 

# βœ… Ensure FAISS is installed
os.system("pip install faiss-cpu")

def log(message):
    print(f"βœ… {message}")


import os
import json
from datasets import load_dataset

DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True)  # Ensure directory exists

# βœ… List of datasets
datasets = {
    "sales": "goendalf666/sales-conversations",
    "blended": "blended_skill_talk",
    "dialog": "daily_dialog",
    "multiwoz": "multi_woz_v22",
}

# βœ… Save datasets to JSON
for name, hf_name in datasets.items():
    print(f"πŸ“₯ Downloading {name} dataset...")
    dataset = load_dataset(hf_name)

    # Extract training data
    train_data = dataset["train"]

    # Convert dataset to list of dictionaries
    data_list = [dict(row) for row in train_data]

    # Save to JSON
    file_path = os.path.join(DATA_DIR, f"{name}.json")
    with open(file_path, "w") as f:
        json.dump(data_list, f, indent=2)

    print(f"βœ… {name} dataset saved to {file_path}")



# βœ… Step 1: Run Embedding Script (Import and Run)
log("πŸš€ Running embeddings script...")
import embeddings  # This will automatically run embeddings.py

time.sleep(5)  # Wait for embeddings to be created

# βœ… Step 2: Check FAISS index
def check_faiss():
    index_path = "my_embeddings"  # Adjust if needed

    try:
        index = faiss.read_index(index_path)
        num_vectors = index.ntotal
        dim = index.d

        if num_vectors > 0:
            return f"πŸ“Š FAISS index contains {num_vectors} vectors.\nβœ… Embedding dimension: {dim}"
        else:
            return "⚠️ No embeddings found in FAISS index!"

    except Exception as e:
        return f"❌ ERROR: Failed to load FAISS index - {e}"

log("πŸ” Checking FAISS embeddings...")
faiss_status = check_faiss()
log(faiss_status)

# βœ… Step 3: Initialize chatbot
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")

def respond(message, history, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})
    response = ""

    for message in client.chat_completions(
        messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
    ):
        token = message["choices"][0]["delta"]["content"]
        response += token
        yield response

# βœ… Step 4: Start Chatbot Interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

log("βœ… All systems go! Launching chatbot...")
if __name__ == "__main__":
    demo.launch()