update
Browse files
app.py
CHANGED
@@ -10,26 +10,29 @@ import faiss
|
|
10 |
|
11 |
# Step 1: Load Precomputed Embeddings and Metadata
|
12 |
def load_embeddings(embeddings_folder='embeddings'):
|
13 |
-
|
14 |
metadata = []
|
15 |
for file in os.listdir(embeddings_folder):
|
16 |
if file.endswith('.npy'):
|
17 |
embedding_path = os.path.join(embeddings_folder, file)
|
18 |
-
embedding = np.load(embedding_path)
|
19 |
-
|
20 |
-
#
|
21 |
meta_info = file.replace('.npy', '') # Example: 'course_1'
|
22 |
-
metadata.
|
23 |
-
return np.array(embeddings), metadata
|
24 |
|
25 |
-
embeddings
|
|
|
|
|
26 |
|
|
|
27 |
|
28 |
-
# Step 2: Set Up FAISS Index
|
29 |
-
dimension = embeddings.shape[1]
|
30 |
index = faiss.IndexFlatL2(dimension)
|
31 |
index.add(embeddings)
|
32 |
|
|
|
33 |
# Step 3: Load the Language Model
|
34 |
model_name = "HuggingFaceH4/zephyr-7b-alpha"
|
35 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
10 |
|
11 |
# Step 1: Load Precomputed Embeddings and Metadata
|
12 |
def load_embeddings(embeddings_folder='embeddings'):
|
13 |
+
all_embeddings = []
|
14 |
metadata = []
|
15 |
for file in os.listdir(embeddings_folder):
|
16 |
if file.endswith('.npy'):
|
17 |
embedding_path = os.path.join(embeddings_folder, file)
|
18 |
+
embedding = np.load(embedding_path) # Shape: (27, 384)
|
19 |
+
all_embeddings.append(embedding)
|
20 |
+
# Metadata corresponds to each .npy file
|
21 |
meta_info = file.replace('.npy', '') # Example: 'course_1'
|
22 |
+
metadata.extend([meta_info] * embedding.shape[0]) # Repeat metadata for each sub-embedding
|
|
|
23 |
|
24 |
+
# Flatten list of embeddings to shape (n * 27, 384)
|
25 |
+
all_embeddings = np.vstack(all_embeddings)
|
26 |
+
return all_embeddings, metadata
|
27 |
|
28 |
+
embeddings, metadata = load_embeddings()
|
29 |
|
30 |
+
# Step 2: Set Up FAISS Index with Flattened Embeddings
|
31 |
+
dimension = embeddings.shape[1] # Should be 384 after flattening
|
32 |
index = faiss.IndexFlatL2(dimension)
|
33 |
index.add(embeddings)
|
34 |
|
35 |
+
|
36 |
# Step 3: Load the Language Model
|
37 |
model_name = "HuggingFaceH4/zephyr-7b-alpha"
|
38 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|