tstone87 commited on
Commit
cfec7bd
Β·
verified Β·
1 Parent(s): d5b8fa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -6
app.py CHANGED
@@ -1,22 +1,141 @@
1
  import os
2
  import shutil
 
 
3
  import gradio as gr
 
 
4
 
5
- # Function to prepare FAISS files for download
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def prepare_faiss_files():
7
- if os.path.exists("policy_embeddings.npy") and os.path.exists("faiss_index.bin"):
8
- shutil.copy("policy_embeddings.npy", "/mnt/data/policy_embeddings.npy")
9
- shutil.copy("faiss_index.bin", "/mnt/data/faiss_index.bin")
10
  return "βœ… FAISS files are ready for download. Go to the 'Files' tab in Hugging Face Space and download them."
11
  else:
12
  return "❌ FAISS files not found. Try running the chatbot first to generate them."
13
 
14
- # Gradio UI to trigger FAISS file preparation
15
  with gr.Blocks() as download_ui:
16
  gr.Markdown("### πŸ”½ Download FAISS Files")
17
  download_button = gr.Button("Prepare FAISS Files for Download")
18
  output_text = gr.Textbox()
19
  download_button.click(fn=prepare_faiss_files, outputs=output_text)
20
 
21
- # Launch the download interface
22
  download_ui.launch()
 
 
 
1
  import os
2
  import shutil
3
+ import faiss
4
+ import numpy as np
5
  import gradio as gr
6
+ from sentence_transformers import SentenceTransformer
7
+ from huggingface_hub import HfApi, hf_hub_download, login
8
 
9
+ # πŸ”Ή Hugging Face Repository Details
10
+ HF_REPO_ID = "tstone87/repo" # Your dataset repo
11
+ HF_TOKEN = os.getenv("HF_TOKEN") # Retrieve token securely
12
+
13
+ if not HF_TOKEN:
14
+ raise ValueError("❌ ERROR: Hugging Face token not found. Add it as a secret in the Hugging Face Space settings.")
15
+
16
+ # πŸ”Ή Authenticate with Hugging Face
17
+ login(token=HF_TOKEN)
18
+
19
+ # πŸ”Ή File Paths
20
+ EMBEDDINGS_FILE = "policy_embeddings.npy"
21
+ INDEX_FILE = "faiss_index.bin"
22
+ TEXT_FILE = "combined_text_documents.txt"
23
+
24
+ # πŸ”Ή Load policy text from file
25
+ if os.path.exists(TEXT_FILE):
26
+ with open(TEXT_FILE, "r", encoding="utf-8") as f:
27
+ POLICY_TEXT = f.read()
28
+ print("βœ… Loaded policy text from combined_text_documents.txt")
29
+ else:
30
+ print("❌ ERROR: combined_text_documents.txt not found! Ensure it's uploaded.")
31
+ POLICY_TEXT = ""
32
+
33
+ # πŸ”Ή Sentence Embedding Model (Optimized for Speed)
34
+ model = SentenceTransformer("all-MiniLM-L6-v2")
35
+
36
+ # πŸ”Ή Split policy text into chunks for FAISS indexing
37
+ chunk_size = 500
38
+ chunks = [POLICY_TEXT[i:i+chunk_size] for i in range(0, len(POLICY_TEXT), chunk_size)] if POLICY_TEXT else []
39
+
40
+ # πŸ”Ή Function to Upload FAISS Files to Hugging Face Hub
41
+ def upload_faiss_to_hf():
42
+ api = HfApi()
43
+
44
+ if os.path.exists(EMBEDDINGS_FILE):
45
+ print("πŸ“€ Uploading FAISS embeddings to Hugging Face...")
46
+ api.upload_file(
47
+ path_or_fileobj=EMBEDDINGS_FILE,
48
+ path_in_repo=EMBEDDINGS_FILE,
49
+ repo_id=HF_REPO_ID,
50
+ repo_type="dataset",
51
+ token=HF_TOKEN,
52
+ )
53
+
54
+ if os.path.exists(INDEX_FILE):
55
+ print("πŸ“€ Uploading FAISS index to Hugging Face...")
56
+ api.upload_file(
57
+ path_or_fileobj=INDEX_FILE,
58
+ path_in_repo=INDEX_FILE,
59
+ repo_id=HF_REPO_ID,
60
+ repo_type="dataset",
61
+ token=HF_TOKEN,
62
+ )
63
+
64
+ print("βœ… FAISS files successfully uploaded to Hugging Face.")
65
+
66
+ # πŸ”Ή Function to Download FAISS Files from Hugging Face Hub if Missing
67
+ def download_faiss_from_hf():
68
+ if not os.path.exists(EMBEDDINGS_FILE):
69
+ print("πŸ“₯ Downloading FAISS embeddings from Hugging Face...")
70
+ hf_hub_download(repo_id=HF_REPO_ID, filename=EMBEDDINGS_FILE, local_dir=".", token=HF_TOKEN)
71
+
72
+ if not os.path.exists(INDEX_FILE):
73
+ print("πŸ“₯ Downloading FAISS index from Hugging Face...")
74
+ hf_hub_download(repo_id=HF_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
75
+
76
+ print("βœ… FAISS files downloaded from Hugging Face.")
77
+
78
+ # πŸ”Ή Check if FAISS Files Exist, Otherwise Download or Generate
79
+ if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
80
+ print("βœ… FAISS files found locally. Loading from disk...")
81
+ embeddings = np.load(EMBEDDINGS_FILE)
82
+ index = faiss.read_index(INDEX_FILE)
83
+ else:
84
+ print("πŸš€ FAISS files not found! Downloading from Hugging Face...")
85
+ download_faiss_from_hf()
86
+
87
+ if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
88
+ embeddings = np.load(EMBEDDINGS_FILE)
89
+ index = faiss.read_index(INDEX_FILE)
90
+ else:
91
+ print("πŸš€ No FAISS files found. Recomputing...")
92
+ if chunks:
93
+ embeddings = np.array([model.encode(chunk) for chunk in chunks])
94
+
95
+ # Save embeddings for future use
96
+ np.save(EMBEDDINGS_FILE, embeddings)
97
+
98
+ # Use FAISS optimized index for faster lookup
99
+ d = embeddings.shape[1]
100
+ nlist = 10 # Number of clusters
101
+ index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, nlist)
102
+ index.train(embeddings)
103
+ index.add(embeddings)
104
+ index.nprobe = 2 # Speed optimization
105
+
106
+ # Save FAISS index
107
+ faiss.write_index(index, INDEX_FILE)
108
+ upload_faiss_to_hf() # Upload FAISS files to Hugging Face
109
+ print("βœ… FAISS index created and saved.")
110
+ else:
111
+ print("❌ ERROR: No text to index. Check combined_text_documents.txt.")
112
+ index = None
113
+
114
+ # πŸ”Ή Function to Search FAISS
115
+ def search_policy(query, top_k=3):
116
+ if index is None:
117
+ return "Error: FAISS index is not available."
118
+
119
+ query_embedding = model.encode(query).reshape(1, -1)
120
+ distances, indices = index.search(query_embedding, top_k)
121
+
122
+ return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
123
+
124
+ # πŸ”Ή Gradio UI to Download FAISS Files
125
  def prepare_faiss_files():
126
+ if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
127
+ shutil.copy(EMBEDDINGS_FILE, "/mnt/data/policy_embeddings.npy")
128
+ shutil.copy(INDEX_FILE, "/mnt/data/faiss_index.bin")
129
  return "βœ… FAISS files are ready for download. Go to the 'Files' tab in Hugging Face Space and download them."
130
  else:
131
  return "❌ FAISS files not found. Try running the chatbot first to generate them."
132
 
 
133
  with gr.Blocks() as download_ui:
134
  gr.Markdown("### πŸ”½ Download FAISS Files")
135
  download_button = gr.Button("Prepare FAISS Files for Download")
136
  output_text = gr.Textbox()
137
  download_button.click(fn=prepare_faiss_files, outputs=output_text)
138
 
 
139
  download_ui.launch()
140
+
141
+ print("βœ… FAISS index successfully loaded.")