Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -238,82 +238,11 @@ def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
|
|
238 |
data['df'] = pd.DataFrame()
|
239 |
return False
|
240 |
|
241 |
-
def load_recipes_data(folder_path='pdf kb.zip'):
|
242 |
-
try:
|
243 |
-
print("Loading documents data...")
|
244 |
-
temp_dir = None
|
245 |
-
|
246 |
-
# Handle .zip file
|
247 |
-
if folder_path.endswith('.zip'):
|
248 |
-
if not os.path.exists(folder_path):
|
249 |
-
print(f"Error: .zip file '{folder_path}' not found.")
|
250 |
-
return False
|
251 |
-
|
252 |
-
# Create a temporary directory for extracting the .zip
|
253 |
-
temp_dir = tempfile.TemporaryDirectory()
|
254 |
-
extract_path = temp_dir.name
|
255 |
-
|
256 |
-
# Extract the .zip file
|
257 |
-
try:
|
258 |
-
with zipfile.ZipFile(folder_path, 'r') as zip_ref:
|
259 |
-
zip_ref.extractall(extract_path)
|
260 |
-
print(f"Extracted .zip file to temporary folder: {extract_path}")
|
261 |
-
except Exception as e:
|
262 |
-
print(f"Error extracting .zip file: {e}")
|
263 |
-
return False
|
264 |
-
|
265 |
-
# Update the folder_path to the extracted directory
|
266 |
-
folder_path = extract_path
|
267 |
-
|
268 |
-
# Check if the folder exists
|
269 |
-
if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
|
270 |
-
print(f"Error: Folder '{folder_path}' not found.")
|
271 |
-
return False
|
272 |
-
|
273 |
-
# List all HTML or PDF files in the folder
|
274 |
-
html_files = [f for f in os.listdir(folder_path) if f.endswith('.html')]
|
275 |
-
pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
|
276 |
-
|
277 |
-
if not html_files and not pdf_files:
|
278 |
-
print(f"No HTML or PDF files found in folder '{folder_path}'.")
|
279 |
-
return False
|
280 |
-
|
281 |
-
documents = []
|
282 |
-
|
283 |
-
# Process PDF files (requires a PDF parser like PyPDF2)
|
284 |
-
for file_name in pdf_files:
|
285 |
-
file_path = os.path.join(folder_path, file_name)
|
286 |
-
try:
|
287 |
-
from PyPDF2 import PdfReader # Import here to avoid dependency issues
|
288 |
-
reader = PdfReader(file_path)
|
289 |
-
text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
|
290 |
-
documents.append({"file_name": file_name, "content": text})
|
291 |
-
except Exception as e:
|
292 |
-
print(f"Error reading PDF file {file_name}: {e}")
|
293 |
-
|
294 |
-
# Convert the list of documents to a DataFrame
|
295 |
-
data['df'] = pd.DataFrame(documents)
|
296 |
-
|
297 |
-
if data['df'].empty:
|
298 |
-
print("No valid documents loaded.")
|
299 |
-
return False
|
300 |
-
|
301 |
-
print(f"Successfully loaded {len(data['df'])} document records.")
|
302 |
-
return True
|
303 |
-
except Exception as e:
|
304 |
-
print(f"Error loading documents data: {e}")
|
305 |
-
data['df'] = pd.DataFrame()
|
306 |
-
return False
|
307 |
-
finally:
|
308 |
-
# Clean up the temporary directory, if created
|
309 |
-
if temp_dir:
|
310 |
-
temp_dir.cleanup()
|
311 |
|
312 |
def load_data():
|
313 |
"""Load all required data"""
|
314 |
embeddings_success = load_embeddings()
|
315 |
documents_success = load_documents_data()
|
316 |
-
recipes_success = load_recipes_data()
|
317 |
recipes_embeddings_success = load_recipes_embeddings()
|
318 |
if not recipes_embeddings_success:
|
319 |
print("Warning: Failed to load embeddings, falling back to basic functionality")
|
@@ -738,9 +667,12 @@ async def chat_endpoint(chat_query: ChatQuery):
|
|
738 |
@app.post("/api/resources")
|
739 |
async def resources_endpoint(profile: MedicalProfile):
|
740 |
try:
|
|
|
741 |
# Build the query text
|
742 |
query_text = profile.conditions + " " + profile.daily_symptoms
|
743 |
|
|
|
|
|
744 |
# Generate the query embedding
|
745 |
query_embedding = embed_query_text(query_text)
|
746 |
if query_embedding is None:
|
@@ -806,11 +738,13 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
806 |
try:
|
807 |
# Build the query text for recipes
|
808 |
recipe_query = (
|
809 |
-
f"Recipes and meals suitable for someone with: "
|
810 |
f"{profile.conditions} and experiencing {profile.daily_symptoms}"
|
811 |
)
|
812 |
query_text = recipe_query
|
813 |
|
|
|
|
|
814 |
# Generate the query embedding
|
815 |
query_embedding = embed_query_text(query_text)
|
816 |
if query_embedding is None:
|
@@ -818,7 +752,7 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
818 |
|
819 |
# Load embeddings and retrieve initial results
|
820 |
embeddings_data = load_recipes_embeddings()
|
821 |
-
folder_path = '
|
822 |
initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
|
823 |
if not initial_results:
|
824 |
raise ValueError("No relevant recipes found.")
|
@@ -841,7 +775,7 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
841 |
scored_documents.sort(key=lambda x: x[0], reverse=True) # Sort by score
|
842 |
|
843 |
# Load recipe metadata from DataFrame
|
844 |
-
file_path = '
|
845 |
df = pd.read_excel(file_path)
|
846 |
|
847 |
# Prepare the final recipes list
|
|
|
238 |
data['df'] = pd.DataFrame()
|
239 |
return False
|
240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
def load_data():
|
243 |
"""Load all required data"""
|
244 |
embeddings_success = load_embeddings()
|
245 |
documents_success = load_documents_data()
|
|
|
246 |
recipes_embeddings_success = load_recipes_embeddings()
|
247 |
if not recipes_embeddings_success:
|
248 |
print("Warning: Failed to load embeddings, falling back to basic functionality")
|
|
|
667 |
@app.post("/api/resources")
|
668 |
async def resources_endpoint(profile: MedicalProfile):
|
669 |
try:
|
670 |
+
|
671 |
# Build the query text
|
672 |
query_text = profile.conditions + " " + profile.daily_symptoms
|
673 |
|
674 |
+
print(f"Generated query text: {query_text}")
|
675 |
+
|
676 |
# Generate the query embedding
|
677 |
query_embedding = embed_query_text(query_text)
|
678 |
if query_embedding is None:
|
|
|
738 |
try:
|
739 |
# Build the query text for recipes
|
740 |
recipe_query = (
|
741 |
+
f"Recipes foods and meals suitable for someone with: "
|
742 |
f"{profile.conditions} and experiencing {profile.daily_symptoms}"
|
743 |
)
|
744 |
query_text = recipe_query
|
745 |
|
746 |
+
print(f"Generated query text: {query_text}")
|
747 |
+
|
748 |
# Generate the query embedding
|
749 |
query_embedding = embed_query_text(query_text)
|
750 |
if query_embedding is None:
|
|
|
752 |
|
753 |
# Load embeddings and retrieve initial results
|
754 |
embeddings_data = load_recipes_embeddings()
|
755 |
+
folder_path = 'downloaded_articles/downloaded_articles'
|
756 |
initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
|
757 |
if not initial_results:
|
758 |
raise ValueError("No relevant recipes found.")
|
|
|
775 |
scored_documents.sort(key=lambda x: x[0], reverse=True) # Sort by score
|
776 |
|
777 |
# Load recipe metadata from DataFrame
|
778 |
+
file_path = 'recipes_metadata.xlsx'
|
779 |
df = pd.read_excel(file_path)
|
780 |
|
781 |
# Prepare the final recipes list
|