Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -160,7 +160,6 @@ def load_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
|
160 |
|
161 |
def load_recipes_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
162 |
try:
|
163 |
-
# Locate or download the embeddings file
|
164 |
embeddings_path = 'recipes_embeddings.safetensors'
|
165 |
if not os.path.exists(embeddings_path):
|
166 |
print("File not found locally. Attempting to download from Hugging Face Hub...")
|
@@ -169,21 +168,15 @@ def load_recipes_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
|
169 |
filename="embeddings.safetensors",
|
170 |
repo_type="space"
|
171 |
)
|
172 |
-
|
|
|
173 |
embeddings = {}
|
174 |
-
|
175 |
with safe_open(embeddings_path, framework="pt") as f:
|
176 |
keys = list(f.keys())
|
177 |
-
#print(f"Available keys in the .safetensors file: {keys}") # Debugging info
|
178 |
-
|
179 |
-
# Iterate over the keys and load tensors
|
180 |
for key in keys:
|
181 |
try:
|
182 |
-
tensor = f.get_tensor(key)
|
183 |
-
if tensor.shape[0] != 384: # Optional: Validate tensor shape
|
184 |
-
print(f"Warning: Tensor for key {key} has unexpected shape {tensor.shape}")
|
185 |
-
|
186 |
-
# Convert tensor to NumPy array
|
187 |
embeddings[key] = tensor.numpy()
|
188 |
except Exception as key_error:
|
189 |
print(f"Failed to process key {key}: {key_error}")
|
@@ -199,57 +192,6 @@ def load_recipes_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
|
199 |
print(f"Error loading embeddings: {e}")
|
200 |
return None
|
201 |
|
202 |
-
def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
|
203 |
-
"""Load document data from HTML articles in a specified folder."""
|
204 |
-
try:
|
205 |
-
print("Loading documents data...")
|
206 |
-
# Check if the folder exists
|
207 |
-
if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
|
208 |
-
print(f"Error: Folder '{folder_path}' not found")
|
209 |
-
return False
|
210 |
-
# List all HTML files in the folder
|
211 |
-
html_files = [f for f in os.listdir(folder_path) if f.endswith('.html')]
|
212 |
-
if not html_files:
|
213 |
-
print(f"No HTML files found in folder '{folder_path}'")
|
214 |
-
return False
|
215 |
-
documents = []
|
216 |
-
# Iterate through each HTML file and parse the content
|
217 |
-
for file_name in html_files:
|
218 |
-
file_path = os.path.join(folder_path, file_name)
|
219 |
-
try:
|
220 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
221 |
-
# Parse the HTML file
|
222 |
-
soup = BeautifulSoup(file, 'html.parser')
|
223 |
-
# Extract text content (or customize this as per your needs)
|
224 |
-
text = soup.get_text(separator='\n').strip()
|
225 |
-
documents.append({"file_name": file_name, "content": text})
|
226 |
-
except Exception as e:
|
227 |
-
print(f"Error reading file {file_name}: {e}")
|
228 |
-
# Convert the list of documents to a DataFrame
|
229 |
-
data['df'] = pd.DataFrame(documents)
|
230 |
-
|
231 |
-
if data['df'].empty:
|
232 |
-
print("No valid documents loaded.")
|
233 |
-
return False
|
234 |
-
print(f"Successfully loaded {len(data['df'])} document records.")
|
235 |
-
return True
|
236 |
-
except Exception as e:
|
237 |
-
print(f"Error loading documents data: {e}")
|
238 |
-
data['df'] = pd.DataFrame()
|
239 |
-
return False
|
240 |
-
|
241 |
-
|
242 |
-
def load_data():
|
243 |
-
"""Load all required data"""
|
244 |
-
embeddings_success = load_embeddings()
|
245 |
-
documents_success = load_documents_data()
|
246 |
-
recipes_embeddings_success = load_recipes_embeddings()
|
247 |
-
if not recipes_embeddings_success:
|
248 |
-
print("Warning: Failed to load embeddings, falling back to basic functionality")
|
249 |
-
return True
|
250 |
-
|
251 |
-
# Initialize application
|
252 |
-
print("Initializing application...")
|
253 |
init_success = load_models() and load_data()
|
254 |
|
255 |
def translate_text(text, source_to_target='ar_to_en'):
|
@@ -741,7 +683,7 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
741 |
|
742 |
# Load recipe metadata from DataFrame
|
743 |
file_path = 'recipes_metadata.xlsx'
|
744 |
-
|
745 |
|
746 |
# Prepare the final recipes list
|
747 |
recipes = []
|
|
|
160 |
|
161 |
def load_recipes_embeddings() -> Optional[Dict[str, np.ndarray]]:
|
162 |
try:
|
|
|
163 |
embeddings_path = 'recipes_embeddings.safetensors'
|
164 |
if not os.path.exists(embeddings_path):
|
165 |
print("File not found locally. Attempting to download from Hugging Face Hub...")
|
|
|
168 |
filename="embeddings.safetensors",
|
169 |
repo_type="space"
|
170 |
)
|
171 |
+
|
172 |
+
# Using safe_open from safetensors to load embeddings
|
173 |
embeddings = {}
|
174 |
+
from safetensors.numpy import safe_open
|
175 |
with safe_open(embeddings_path, framework="pt") as f:
|
176 |
keys = list(f.keys())
|
|
|
|
|
|
|
177 |
for key in keys:
|
178 |
try:
|
179 |
+
tensor = f.get_tensor(key)
|
|
|
|
|
|
|
|
|
180 |
embeddings[key] = tensor.numpy()
|
181 |
except Exception as key_error:
|
182 |
print(f"Failed to process key {key}: {key_error}")
|
|
|
192 |
print(f"Error loading embeddings: {e}")
|
193 |
return None
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
init_success = load_models() and load_data()
|
196 |
|
197 |
def translate_text(text, source_to_target='ar_to_en'):
|
|
|
683 |
|
684 |
# Load recipe metadata from DataFrame
|
685 |
file_path = 'recipes_metadata.xlsx'
|
686 |
+
metadata_df = pd.read_excel(file_path)
|
687 |
|
688 |
# Prepare the final recipes list
|
689 |
recipes = []
|