thechaiexperiment commited on
Commit
67c114d
·
1 Parent(s): 978a91d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -12
app.py CHANGED
@@ -87,9 +87,14 @@ import numpy as np
87
  import os
88
  from typing import Dict, Optional
89
 
 
 
 
 
 
90
  def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[str, np.ndarray]]:
91
  """
92
- Load embeddings from a pickle file containing a dictionary of numpy arrays.
93
 
94
  Args:
95
  embeddings_path (str): Path to the pickle file containing embeddings
@@ -103,7 +108,9 @@ def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[st
103
 
104
  try:
105
  with open(embeddings_path, 'rb') as f:
106
- embeddings = pickle.load(f)
 
 
107
 
108
  # Validate the loaded data
109
  if not isinstance(embeddings, dict):
@@ -111,18 +118,30 @@ def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[st
111
  return None
112
 
113
  # Convert values to numpy arrays if they aren't already
114
- for key in embeddings:
115
- if not isinstance(embeddings[key], np.ndarray):
116
- embeddings[key] = np.array(embeddings[key])
 
 
 
 
 
 
 
 
 
117
 
118
  # Print sample for verification
119
- sample_key = next(iter(embeddings))
120
- print(f"Data type: {type(embeddings)}")
121
- print(f"First few keys and values:")
122
- print(f"Key: {sample_key}, Value: {embeddings[sample_key][:20]}") # Show first 20 values
123
- print(f"Successfully loaded {len(embeddings)} embeddings")
124
-
125
- return embeddings
 
 
 
126
 
127
  except Exception as e:
128
  print(f"Error loading embeddings: {str(e)}")
 
87
  import os
88
  from typing import Dict, Optional
89
 
90
+ class EmbeddingsUnpickler(pickle.Unpickler):
91
+ def persistent_load(self, pid):
92
+ # Handle persistent IDs by returning them as-is
93
+ return pid
94
+
95
  def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[str, np.ndarray]]:
96
  """
97
+ Load embeddings from a pickle file with support for persistent IDs.
98
 
99
  Args:
100
  embeddings_path (str): Path to the pickle file containing embeddings
 
108
 
109
  try:
110
  with open(embeddings_path, 'rb') as f:
111
+ # Use custom unpickler with persistent_load support
112
+ unpickler = EmbeddingsUnpickler(f)
113
+ embeddings = unpickler.load()
114
 
115
  # Validate the loaded data
116
  if not isinstance(embeddings, dict):
 
118
  return None
119
 
120
  # Convert values to numpy arrays if they aren't already
121
+ processed_embeddings = {}
122
+ for key, value in embeddings.items():
123
+ # Handle both direct arrays and persistent IDs
124
+ if isinstance(value, (list, np.ndarray)):
125
+ processed_embeddings[key] = np.array(value)
126
+ else:
127
+ # If it's a persistent ID, convert it to a numpy array
128
+ try:
129
+ processed_embeddings[key] = np.array(value)
130
+ except Exception as e:
131
+ print(f"Warning: Could not convert embedding for {key}: {e}")
132
+ continue
133
 
134
  # Print sample for verification
135
+ if processed_embeddings:
136
+ sample_key = next(iter(processed_embeddings))
137
+ print(f"Data type: {type(processed_embeddings)}")
138
+ print(f"First few keys and values:")
139
+ print(f"Key: {sample_key}, Value: {processed_embeddings[sample_key][:20]}")
140
+ print(f"Successfully loaded {len(processed_embeddings)} embeddings")
141
+ return processed_embeddings
142
+ else:
143
+ print("Error: No valid embeddings were processed")
144
+ return None
145
 
146
  except Exception as e:
147
  print(f"Error loading embeddings: {str(e)}")