Spaces:
Sleeping
Sleeping
Commit
·
1dbb3fe
1
Parent(s):
d32adc2
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,6 +25,7 @@ from huggingface_hub import hf_hub_download
|
|
| 25 |
from safetensors.torch import load_file
|
| 26 |
from typing import List, Dict, Optional
|
| 27 |
from safetensors.numpy import load_file
|
|
|
|
| 28 |
|
| 29 |
# Initialize FastAPI app
|
| 30 |
app = FastAPI()
|
|
@@ -112,7 +113,6 @@ def load_models():
|
|
| 112 |
return False
|
| 113 |
|
| 114 |
def load_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
| 115 |
-
"""Load embeddings from Safetensors file"""
|
| 116 |
try:
|
| 117 |
# Locate or download embeddings file
|
| 118 |
embeddings_path = 'embeddings.safetensors'
|
|
@@ -124,25 +124,38 @@ def load_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
|
| 124 |
repo_type="space"
|
| 125 |
)
|
| 126 |
|
| 127 |
-
#
|
| 128 |
-
embeddings =
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
-
return result
|
| 141 |
|
| 142 |
except Exception as e:
|
| 143 |
print(f"Error loading embeddings: {e}")
|
| 144 |
return None
|
| 145 |
|
|
|
|
| 146 |
def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
|
| 147 |
"""Load document data from HTML articles in a specified folder."""
|
| 148 |
try:
|
|
|
|
| 25 |
from safetensors.torch import load_file
|
| 26 |
from typing import List, Dict, Optional
|
| 27 |
from safetensors.numpy import load_file
|
| 28 |
+
from safetensors.torch import safe_open
|
| 29 |
|
| 30 |
# Initialize FastAPI app
|
| 31 |
app = FastAPI()
|
|
|
|
| 113 |
return False
|
| 114 |
|
| 115 |
def load_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
|
|
|
| 116 |
try:
|
| 117 |
# Locate or download embeddings file
|
| 118 |
embeddings_path = 'embeddings.safetensors'
|
|
|
|
| 124 |
repo_type="space"
|
| 125 |
)
|
| 126 |
|
| 127 |
+
# Initialize a dictionary to store embeddings
|
| 128 |
+
embeddings = {}
|
| 129 |
+
|
| 130 |
+
# Open the safetensors file
|
| 131 |
+
with safe_open(embeddings_path, framework="pt") as f:
|
| 132 |
+
keys = f.keys()
|
| 133 |
+
print(f"Available keys in the .safetensors file: {list(keys)}") # Debugging info
|
| 134 |
+
|
| 135 |
+
# Iterate over the keys and load tensors
|
| 136 |
+
for key in keys:
|
| 137 |
+
try:
|
| 138 |
+
tensor = f.get_tensor(key)
|
| 139 |
+
if not isinstance(tensor, torch.Tensor):
|
| 140 |
+
raise TypeError(f"Value for key {key} is not a valid PyTorch tensor.")
|
| 141 |
+
|
| 142 |
+
# Convert tensor to NumPy array
|
| 143 |
+
embeddings[key] = tensor.numpy()
|
| 144 |
+
except Exception as key_error:
|
| 145 |
+
print(f"Failed to process key {key}: {key_error}")
|
| 146 |
+
|
| 147 |
+
if embeddings:
|
| 148 |
+
print("Embeddings successfully loaded.")
|
| 149 |
+
else:
|
| 150 |
+
print("No embeddings could be loaded. Please check the file format and content.")
|
| 151 |
|
| 152 |
+
return embeddings
|
|
|
|
| 153 |
|
| 154 |
except Exception as e:
|
| 155 |
print(f"Error loading embeddings: {e}")
|
| 156 |
return None
|
| 157 |
|
| 158 |
+
|
| 159 |
def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
|
| 160 |
"""Load document data from HTML articles in a specified folder."""
|
| 161 |
try:
|