VeganSquirrel commited on
Commit
873e3a7
·
verified ·
1 Parent(s): 96f49a8
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -10,26 +10,29 @@ import faiss
10
 
11
  # Step 1: Load Precomputed Embeddings and Metadata
12
  def load_embeddings(embeddings_folder='embeddings'):
13
- embeddings = []
14
  metadata = []
15
  for file in os.listdir(embeddings_folder):
16
  if file.endswith('.npy'):
17
  embedding_path = os.path.join(embeddings_folder, file)
18
- embedding = np.load(embedding_path)
19
- embeddings.append(embedding)
20
- # Extract metadata from the filename or set a default value
21
  meta_info = file.replace('.npy', '') # Example: 'course_1'
22
- metadata.append(meta_info)
23
- return np.array(embeddings), metadata
24
 
25
- embeddings, metadata = load_embeddings()
 
 
26
 
 
27
 
28
- # Step 2: Set Up FAISS Index
29
- dimension = embeddings.shape[1]
30
  index = faiss.IndexFlatL2(dimension)
31
  index.add(embeddings)
32
 
 
33
  # Step 3: Load the Language Model
34
  model_name = "HuggingFaceH4/zephyr-7b-alpha"
35
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
10
 
11
  # Step 1: Load Precomputed Embeddings and Metadata
12
  def load_embeddings(embeddings_folder='embeddings'):
13
+ all_embeddings = []
14
  metadata = []
15
  for file in os.listdir(embeddings_folder):
16
  if file.endswith('.npy'):
17
  embedding_path = os.path.join(embeddings_folder, file)
18
+ embedding = np.load(embedding_path) # Shape: (27, 384)
19
+ all_embeddings.append(embedding)
20
+ # Metadata corresponds to each .npy file
21
  meta_info = file.replace('.npy', '') # Example: 'course_1'
22
+ metadata.extend([meta_info] * embedding.shape[0]) # Repeat metadata for each sub-embedding
 
23
 
24
+ # Flatten list of embeddings to shape (n * 27, 384)
25
+ all_embeddings = np.vstack(all_embeddings)
26
+ return all_embeddings, metadata
27
 
28
+ embeddings, metadata = load_embeddings()
29
 
30
+ # Step 2: Set Up FAISS Index with Flattened Embeddings
31
+ dimension = embeddings.shape[1] # Should be 384 after flattening
32
  index = faiss.IndexFlatL2(dimension)
33
  index.add(embeddings)
34
 
35
+
36
  # Step 3: Load the Language Model
37
  model_name = "HuggingFaceH4/zephyr-7b-alpha"
38
  tokenizer = AutoTokenizer.from_pretrained(model_name)