Spaces:
Sleeping
Sleeping
Commit
·
67c114d
1
Parent(s):
978a91d
Update app.py
Browse files
app.py
CHANGED
@@ -87,9 +87,14 @@ import numpy as np
|
|
87 |
import os
|
88 |
from typing import Dict, Optional
|
89 |
|
|
|
|
|
|
|
|
|
|
|
90 |
def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[str, np.ndarray]]:
|
91 |
"""
|
92 |
-
Load embeddings from a pickle file
|
93 |
|
94 |
Args:
|
95 |
embeddings_path (str): Path to the pickle file containing embeddings
|
@@ -103,7 +108,9 @@ def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[st
|
|
103 |
|
104 |
try:
|
105 |
with open(embeddings_path, 'rb') as f:
|
106 |
-
|
|
|
|
|
107 |
|
108 |
# Validate the loaded data
|
109 |
if not isinstance(embeddings, dict):
|
@@ -111,18 +118,30 @@ def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[st
|
|
111 |
return None
|
112 |
|
113 |
# Convert values to numpy arrays if they aren't already
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
# Print sample for verification
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
126 |
|
127 |
except Exception as e:
|
128 |
print(f"Error loading embeddings: {str(e)}")
|
|
|
87 |
import os
|
88 |
from typing import Dict, Optional
|
89 |
|
90 |
+
class EmbeddingsUnpickler(pickle.Unpickler):
|
91 |
+
def persistent_load(self, pid):
|
92 |
+
# Handle persistent IDs by returning them as-is
|
93 |
+
return pid
|
94 |
+
|
95 |
def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[str, np.ndarray]]:
|
96 |
"""
|
97 |
+
Load embeddings from a pickle file with support for persistent IDs.
|
98 |
|
99 |
Args:
|
100 |
embeddings_path (str): Path to the pickle file containing embeddings
|
|
|
108 |
|
109 |
try:
|
110 |
with open(embeddings_path, 'rb') as f:
|
111 |
+
# Use custom unpickler with persistent_load support
|
112 |
+
unpickler = EmbeddingsUnpickler(f)
|
113 |
+
embeddings = unpickler.load()
|
114 |
|
115 |
# Validate the loaded data
|
116 |
if not isinstance(embeddings, dict):
|
|
|
118 |
return None
|
119 |
|
120 |
# Convert values to numpy arrays if they aren't already
|
121 |
+
processed_embeddings = {}
|
122 |
+
for key, value in embeddings.items():
|
123 |
+
# Handle both direct arrays and persistent IDs
|
124 |
+
if isinstance(value, (list, np.ndarray)):
|
125 |
+
processed_embeddings[key] = np.array(value)
|
126 |
+
else:
|
127 |
+
# If it's a persistent ID, convert it to a numpy array
|
128 |
+
try:
|
129 |
+
processed_embeddings[key] = np.array(value)
|
130 |
+
except Exception as e:
|
131 |
+
print(f"Warning: Could not convert embedding for {key}: {e}")
|
132 |
+
continue
|
133 |
|
134 |
# Print sample for verification
|
135 |
+
if processed_embeddings:
|
136 |
+
sample_key = next(iter(processed_embeddings))
|
137 |
+
print(f"Data type: {type(processed_embeddings)}")
|
138 |
+
print(f"First few keys and values:")
|
139 |
+
print(f"Key: {sample_key}, Value: {processed_embeddings[sample_key][:20]}")
|
140 |
+
print(f"Successfully loaded {len(processed_embeddings)} embeddings")
|
141 |
+
return processed_embeddings
|
142 |
+
else:
|
143 |
+
print("Error: No valid embeddings were processed")
|
144 |
+
return None
|
145 |
|
146 |
except Exception as e:
|
147 |
print(f"Error loading embeddings: {str(e)}")
|