thechaiexperiment commited on
Commit
7becdb7
·
1 Parent(s): cdeba07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -56
app.py CHANGED
@@ -82,69 +82,51 @@ def load_models():
82
  print(f"Error loading models: {e}")
83
  return False
84
 
85
- def load_embeddings():
86
- """Load embeddings with robust error handling for numpy arrays"""
87
- try:
88
- print("Loading embeddings...")
89
- embeddings_path = 'embeddings.pkl'
90
-
91
- if not os.path.exists(embeddings_path):
92
- print(f"Error: {embeddings_path} not found")
93
- return False
94
 
95
- def persistent_load(pid):
96
- return pid
97
-
98
- class CustomUnpickler(pickle.Unpickler):
99
- def persistent_load(self, pid):
100
- return pid
101
-
102
- def find_class(self, module, name):
103
- if module == "__main__":
104
- module = "numpy"
105
- return super().find_class(module, name)
106
 
107
- with open(embeddings_path, 'rb') as f:
108
- try:
109
- # Try loading with numpy first
110
- data['embeddings'] = np.load(f, allow_pickle=True).item()
111
- except Exception as e:
112
- print(f"Numpy loading failed, trying pickle: {e}")
113
- f.seek(0)
114
- try:
115
- # Try standard pickle
116
- data['embeddings'] = pickle.load(f)
117
- except Exception as e:
118
- print(f"Standard pickle failed, trying custom unpickler: {e}")
119
- f.seek(0)
120
- try:
121
- # Try custom unpickler with persistent load handler
122
- unpickler = CustomUnpickler(f)
123
- data['embeddings'] = unpickler.load()
124
- except Exception as e:
125
- print(f"Custom unpickler failed: {e}")
126
- data['embeddings'] = {}
127
- return False
128
 
129
- # Verify the loaded data
130
- if not isinstance(data['embeddings'], dict):
131
- print("Error: Embeddings data is not in expected format")
132
- print(f"Actual type: {type(data['embeddings'])}")
133
- data['embeddings'] = {}
134
- return False
135
 
136
- # Verify the structure of the embeddings
137
- sample_key = next(iter(data['embeddings']))
138
- sample_value = data['embeddings'][sample_key]
139
- print(f"Sample embedding structure - Key: {sample_key}, Value type: {type(sample_value)}, Shape: {np.array(sample_value).shape}")
 
 
 
 
 
140
 
141
- print(f"Successfully loaded {len(data['embeddings'])} embeddings")
142
- return True
 
 
 
 
 
 
143
 
144
  except Exception as e:
145
- print(f"Error loading embeddings: {e}")
146
- data['embeddings'] = {}
147
- return False
148
 
149
  def load_documents_data():
150
  """Load document data with error handling"""
 
82
  print(f"Error loading models: {e}")
83
  return False
84
 
85
+ import pickle
86
+ import numpy as np
87
+ import os
88
+ from typing import Dict, Optional
 
 
 
 
 
89
 
90
+ def load_embeddings(embeddings_path: str = 'embeddings.pkl') -> Optional[Dict[str, np.ndarray]]:
91
+ """
92
+ Load embeddings from a pickle file containing a dictionary of numpy arrays.
93
+
94
+ Args:
95
+ embeddings_path (str): Path to the pickle file containing embeddings
 
 
 
 
 
96
 
97
+ Returns:
98
+ Optional[Dict[str, np.ndarray]]: Dictionary of embeddings or None if loading fails
99
+ """
100
+ if not os.path.exists(embeddings_path):
101
+ print(f"Error: {embeddings_path} not found")
102
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ try:
105
+ with open(embeddings_path, 'rb') as f:
106
+ embeddings = pickle.load(f)
 
 
 
107
 
108
+ # Validate the loaded data
109
+ if not isinstance(embeddings, dict):
110
+ print(f"Error: Expected dict, got {type(embeddings)}")
111
+ return None
112
+
113
+ # Convert values to numpy arrays if they aren't already
114
+ for key in embeddings:
115
+ if not isinstance(embeddings[key], np.ndarray):
116
+ embeddings[key] = np.array(embeddings[key])
117
 
118
+ # Print sample for verification
119
+ sample_key = next(iter(embeddings))
120
+ print(f"Data type: {type(embeddings)}")
121
+ print(f"First few keys and values:")
122
+ print(f"Key: {sample_key}, Value: {embeddings[sample_key][:20]}") # Show first 20 values
123
+ print(f"Successfully loaded {len(embeddings)} embeddings")
124
+
125
+ return embeddings
126
 
127
  except Exception as e:
128
+ print(f"Error loading embeddings: {str(e)}")
129
+ return None
 
130
 
131
  def load_documents_data():
132
  """Load document data with error handling"""