thechaiexperiment commited on
Commit
1dbb3fe
·
1 Parent(s): d32adc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -14
app.py CHANGED
@@ -25,6 +25,7 @@ from huggingface_hub import hf_hub_download
25
  from safetensors.torch import load_file
26
  from typing import List, Dict, Optional
27
  from safetensors.numpy import load_file
 
28
 
29
  # Initialize FastAPI app
30
  app = FastAPI()
@@ -112,7 +113,6 @@ def load_models():
112
  return False
113
 
114
  def load_embeddings() -> Optional[Dict[str, np.ndarray]]:
115
- """Load embeddings from Safetensors file"""
116
  try:
117
  # Locate or download embeddings file
118
  embeddings_path = 'embeddings.safetensors'
@@ -124,25 +124,38 @@ def load_embeddings() -> Optional[Dict[str, np.ndarray]]:
124
  repo_type="space"
125
  )
126
 
127
- # Load Safetensors file
128
- embeddings = load_file(embeddings_path)
129
- if not isinstance(embeddings, dict):
130
- raise ValueError("Expected a dictionary in the Safetensors file.")
131
-
132
- # Validate and convert tensors to numpy arrays
133
- result = {}
134
- for key, tensor in embeddings.items():
135
- if not hasattr(tensor, 'numpy'):
136
- raise TypeError(f"Value for key {key} is not a tensor or cannot be converted to numpy.")
137
- result[key] = tensor.numpy()
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- print("Embeddings successfully loaded.")
140
- return result
141
 
142
  except Exception as e:
143
  print(f"Error loading embeddings: {e}")
144
  return None
145
 
 
146
  def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
147
  """Load document data from HTML articles in a specified folder."""
148
  try:
 
25
  from safetensors.torch import load_file
26
  from typing import List, Dict, Optional
27
  from safetensors.numpy import load_file
28
+ from safetensors.torch import safe_open
29
 
30
  # Initialize FastAPI app
31
  app = FastAPI()
 
113
  return False
114
 
115
  def load_embeddings() -> Optional[Dict[str, np.ndarray]]:
 
116
  try:
117
  # Locate or download embeddings file
118
  embeddings_path = 'embeddings.safetensors'
 
124
  repo_type="space"
125
  )
126
 
127
+ # Initialize a dictionary to store embeddings
128
+ embeddings = {}
129
+
130
+ # Open the safetensors file
131
+ with safe_open(embeddings_path, framework="pt") as f:
132
+ keys = f.keys()
133
+ print(f"Available keys in the .safetensors file: {list(keys)}") # Debugging info
134
+
135
+ # Iterate over the keys and load tensors
136
+ for key in keys:
137
+ try:
138
+ tensor = f.get_tensor(key)
139
+ if not isinstance(tensor, torch.Tensor):
140
+ raise TypeError(f"Value for key {key} is not a valid PyTorch tensor.")
141
+
142
+ # Convert tensor to NumPy array
143
+ embeddings[key] = tensor.numpy()
144
+ except Exception as key_error:
145
+ print(f"Failed to process key {key}: {key_error}")
146
+
147
+ if embeddings:
148
+ print("Embeddings successfully loaded.")
149
+ else:
150
+ print("No embeddings could be loaded. Please check the file format and content.")
151
 
152
+ return embeddings
 
153
 
154
  except Exception as e:
155
  print(f"Error loading embeddings: {e}")
156
  return None
157
 
158
+
159
  def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
160
  """Load document data from HTML articles in a specified folder."""
161
  try: