abrah926 commited on
Commit
690b43c
Β·
verified Β·
1 Parent(s): 07d68d6

udp app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -30
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- from datasets import load_dataset
4
  import faiss
5
  import numpy as np
6
  import os
7
  import time
8
- import json
 
9
 
10
  # βœ… Ensure FAISS is installed
11
  os.system("pip install faiss-cpu")
@@ -13,11 +13,10 @@ os.system("pip install faiss-cpu")
13
  def log(message):
14
  print(f"βœ… {message}")
15
 
16
-
17
  DATA_DIR = "data"
18
  os.makedirs(DATA_DIR, exist_ok=True) # Ensure directory exists
19
 
20
- # βœ… List of datasets
21
  datasets = {
22
  "sales": "goendalf666/sales-conversations",
23
  "blended": "blended_skill_talk",
@@ -25,46 +24,44 @@ datasets = {
25
  "multiwoz": "multi_woz_v22",
26
  }
27
 
28
- # βœ… Save datasets to JSON
29
  for name, hf_name in datasets.items():
30
- print(f"πŸ“₯ Downloading {name} dataset...")
31
- dataset = load_dataset(hf_name)
32
 
33
- # Extract training data
34
- train_data = dataset["train"]
 
35
 
36
- # Convert dataset to list of dictionaries
 
 
37
  data_list = [dict(row) for row in train_data]
38
 
39
- # Save to JSON
40
- file_path = os.path.join(DATA_DIR, f"{name}.json")
41
  with open(file_path, "w") as f:
42
  json.dump(data_list, f, indent=2)
43
 
44
- print(f"βœ… {name} dataset saved to {file_path}")
45
-
46
-
47
 
48
- # βœ… Step 1: Run Embedding Script (Import and Run)
49
- log("πŸš€ Running embeddings script...")
50
- import embeddings # This will automatically run embeddings.py
 
 
51
 
52
- time.sleep(5) # Wait for embeddings to be created
 
53
 
54
- # βœ… Step 2: Check FAISS index
55
  def check_faiss():
56
- index_path = "my_embeddings" # Adjust if needed
 
 
 
57
 
58
  try:
59
  index = faiss.read_index(index_path)
60
  num_vectors = index.ntotal
61
  dim = index.d
62
-
63
- if num_vectors > 0:
64
- return f"πŸ“Š FAISS index contains {num_vectors} vectors.\nβœ… Embedding dimension: {dim}"
65
- else:
66
- return "⚠️ No embeddings found in FAISS index!"
67
-
68
  except Exception as e:
69
  return f"❌ ERROR: Failed to load FAISS index - {e}"
70
 
@@ -72,7 +69,7 @@ log("πŸ” Checking FAISS embeddings...")
72
  faiss_status = check_faiss()
73
  log(faiss_status)
74
 
75
- # βœ… Step 3: Initialize chatbot
76
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
77
 
78
  def respond(message, history, system_message, max_tokens, temperature, top_p):
@@ -94,7 +91,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
94
  response += token
95
  yield response
96
 
97
- # βœ… Step 4: Start Chatbot Interface
98
  demo = gr.ChatInterface(
99
  respond,
100
  additional_inputs=[
@@ -107,5 +104,5 @@ demo = gr.ChatInterface(
107
 
108
  log("βœ… All systems go! Launching chatbot...")
109
  if __name__ == "__main__":
110
- demo.launch()
111
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
  import faiss
4
  import numpy as np
5
  import os
6
  import time
7
+ import json
8
+ import threading # βœ… Run embeddings in parallel
9
 
10
  # βœ… Ensure FAISS is installed
11
  os.system("pip install faiss-cpu")
 
13
  def log(message):
14
  print(f"βœ… {message}")
15
 
 
16
  DATA_DIR = "data"
17
  os.makedirs(DATA_DIR, exist_ok=True) # Ensure directory exists
18
 
19
+ # βœ… Step 1: Load Datasets from HF and Save Locally
20
  datasets = {
21
  "sales": "goendalf666/sales-conversations",
22
  "blended": "blended_skill_talk",
 
24
  "multiwoz": "multi_woz_v22",
25
  }
26
 
 
27
  for name, hf_name in datasets.items():
28
+ file_path = os.path.join(DATA_DIR, f"{name}.json")
 
29
 
30
+ if os.path.exists(file_path):
31
+ log(f"βœ… {name} dataset already stored at {file_path}")
32
+ continue # Skip if dataset exists
33
 
34
+ log(f"πŸ“₯ Downloading {name} dataset...")
35
+ dataset = load_dataset(hf_name)
36
+ train_data = dataset["train"]
37
  data_list = [dict(row) for row in train_data]
38
 
 
 
39
  with open(file_path, "w") as f:
40
  json.dump(data_list, f, indent=2)
41
 
42
+ log(f"βœ… {name} dataset saved to {file_path}")
 
 
43
 
44
+ # βœ… Step 2: Run Embeddings in a Separate Thread
45
+ def run_embeddings():
46
+ log("πŸš€ Running embeddings script in background...")
47
+ import embeddings # βœ… This will automatically run embeddings.py
48
+ log("βœ… Embeddings process finished.")
49
 
50
+ embedding_thread = threading.Thread(target=run_embeddings)
51
+ embedding_thread.start() # βœ… Start embedding in background
52
 
53
+ # βœ… Step 3: Check FAISS index
54
  def check_faiss():
55
+ index_path = "my_embeddings.faiss" # Ensure file has .faiss extension
56
+
57
+ if not os.path.exists(index_path):
58
+ return "⚠️ No FAISS index found! Embeddings might still be processing."
59
 
60
  try:
61
  index = faiss.read_index(index_path)
62
  num_vectors = index.ntotal
63
  dim = index.d
64
+ return f"πŸ“Š FAISS index contains {num_vectors} vectors.\nβœ… Embedding dimension: {dim}"
 
 
 
 
 
65
  except Exception as e:
66
  return f"❌ ERROR: Failed to load FAISS index - {e}"
67
 
 
69
  faiss_status = check_faiss()
70
  log(faiss_status)
71
 
72
+ # βœ… Step 4: Initialize Chatbot
73
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
74
 
75
  def respond(message, history, system_message, max_tokens, temperature, top_p):
 
91
  response += token
92
  yield response
93
 
94
+ # βœ… Step 5: Start Chatbot Interface
95
  demo = gr.ChatInterface(
96
  respond,
97
  additional_inputs=[
 
104
 
105
  log("βœ… All systems go! Launching chatbot...")
106
  if __name__ == "__main__":
107
+ demo.launch() # βœ… FIXED typo
108