thechaiexperiment commited on
Commit
4d9cbac
·
1 Parent(s): 26493ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -41
app.py CHANGED
@@ -6,6 +6,7 @@ import torchvision
6
  import nltk
7
  import torch
8
  import pandas as pd
 
9
  from fastapi import FastAPI, HTTPException
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from pydantic import BaseModel
@@ -49,11 +50,8 @@ class QueryRequest(BaseModel):
49
  language_code: int = 1
50
 
51
  class MedicalProfile(BaseModel):
52
- chronic_conditions: List[str]
53
- symptoms: List[str]
54
- food_restrictions: List[str]
55
- mental_conditions: List[str]
56
- daily_symptoms: List[str]
57
 
58
  class ChatQuery(BaseModel):
59
  query: str
@@ -250,6 +248,18 @@ def query_embeddings(query_embedding, embeddings_data=None, n_results=5):
250
  print(f"Error in query_embeddings: {e}")
251
  return []
252
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded_articles'):
254
  texts = []
255
  for doc_id in doc_ids:
@@ -520,7 +530,6 @@ async def chat_endpoint(chat_query: ChatQuery):
520
  folder_path = 'downloaded_articles/downloaded_articles'
521
  initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
522
  document_ids = [doc_id for doc_id, _ in initial_results]
523
- document_ids = [doc_id for doc_id, _ in initial_results]
524
  document_texts = retrieve_document_texts(document_ids, folder_path)
525
  cross_encoder = models['cross_encoder']
526
  scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
@@ -557,46 +566,33 @@ async def chat_endpoint(chat_query: ChatQuery):
557
 
558
  @app.post("/api/resources")
559
  async def resources_endpoint(profile: MedicalProfile):
560
- try:
561
- # Validate profile input
562
- if not profile.chronic_conditions or not profile.daily_symptoms or not profile.food_restrictions or not profile.mental_conditions:
563
- raise ValueError("Incomplete profile data provided.")
564
-
565
- # Build context
566
- context = f"""
567
- Medical conditions: {', '.join(profile.chronic_conditions)}
568
- Current symptoms: {', '.join(profile.daily_symptoms)}
569
- Restrictions: {', '.join(profile.food_restrictions)}
570
- Mental health: {', '.join(profile.mental_conditions)}
571
- """
572
- query_text = context
573
-
574
- # Embed query and fetch embeddings
575
- query_embedding = embed_query_text(query_text)
576
- if query_embedding is None:
577
- raise ValueError("Query embedding generation failed.")
578
-
579
- embeddings_data = load_embeddings()
580
- if not embeddings_data:
581
- raise ValueError("Failed to load embeddings data.")
582
-
583
  folder_path = 'downloaded_articles/downloaded_articles'
584
- initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
585
- if not initial_results:
586
- raise ValueError("No initial results found from query_embeddings.")
587
-
588
- # Retrieve document texts
 
 
 
 
 
 
 
 
 
 
 
 
589
  document_ids = [doc_id for doc_id, _ in initial_results]
590
  document_texts = retrieve_document_texts(document_ids, folder_path)
591
- if not document_texts or any(doc is None for doc in document_texts):
592
- raise ValueError("Failed to retrieve valid document texts.")
593
-
594
- # Perform reranking
595
  cross_encoder = models['cross_encoder']
596
  scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
597
  scored_documents = list(zip(scores, document_ids, document_texts))
598
  scored_documents.sort(key=lambda x: x[0], reverse=True)
599
-
600
  # Build resources response
601
  resources = []
602
  for score, doc_id, text in scored_documents[:10]:
@@ -610,9 +606,7 @@ async def resources_endpoint(profile: MedicalProfile):
610
  "content": text[:200],
611
  "score": float(score)
612
  })
613
-
614
  return {"resources": resources, "success": True}
615
-
616
  except ValueError as ve:
617
  # Handle expected errors gracefully
618
  raise HTTPException(status_code=400, detail=str(ve))
 
6
  import nltk
7
  import torch
8
  import pandas as pd
9
+ import requests
10
  from fastapi import FastAPI, HTTPException
11
  from fastapi.middleware.cors import CORSMiddleware
12
  from pydantic import BaseModel
 
50
  language_code: int = 1
51
 
52
  class MedicalProfile(BaseModel):
53
+ conditions: str
54
+ daily_symptoms: str
 
 
 
55
 
56
  class ChatQuery(BaseModel):
57
  query: str
 
248
  print(f"Error in query_embeddings: {e}")
249
  return []
250
 
251
+ def get_page_title(url):
252
+ try:
253
+ response = requests.get(url)
254
+ if response.status_code == 200:
255
+ soup = BeautifulSoup(response.text, 'html.parser')
256
+ title = soup.find('title')
257
+ return title.get_text() if title else "No title found"
258
+ else:
259
+ return None
260
+ except requests.exceptions.RequestException:
261
+ return None
262
+
263
  def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded_articles'):
264
  texts = []
265
  for doc_id in doc_ids:
 
530
  folder_path = 'downloaded_articles/downloaded_articles'
531
  initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
532
  document_ids = [doc_id for doc_id, _ in initial_results]
 
533
  document_texts = retrieve_document_texts(document_ids, folder_path)
534
  cross_encoder = models['cross_encoder']
535
  scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
 
566
 
567
  @app.post("/api/resources")
568
  async def resources_endpoint(profile: MedicalProfile):
569
+ query_text = MedicalProfile.conditions + MedicalProfile.daily_symptoms
570
+ query_embedding = embed_query_text(query_text) # Embed the query text
571
+ embeddings_data = load_embeddings ()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
  folder_path = 'downloaded_articles/downloaded_articles'
573
+ initial_results = query_embeddings(query_embedding, embeddings_data, n_results=6)
574
+ file_path = 'finalcleaned_excel_file.xlsx'
575
+ df = pd.read_excel(file_path)
576
+ file_name_to_url = {f"article_{index}.html": url for index, url in enumerate(df['Unnamed: 0'])}
577
+ file_names = document_ids
578
+
579
+ # Retrieve original URLs
580
+ for file_name in file_names:
581
+ original_url = file_name_to_url.get(file_name, None)
582
+ if original_url:
583
+ title = get_page_title(original_url)
584
+ if title:
585
+ print(f"Title: {title},URL: {original_url}")
586
+ else:
587
+ print(f"Name: {file_name}")
588
+ else:
589
+ print(f"Name: {file_name}")
590
  document_ids = [doc_id for doc_id, _ in initial_results]
591
  document_texts = retrieve_document_texts(document_ids, folder_path)
 
 
 
 
592
  cross_encoder = models['cross_encoder']
593
  scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
594
  scored_documents = list(zip(scores, document_ids, document_texts))
595
  scored_documents.sort(key=lambda x: x[0], reverse=True)
 
596
  # Build resources response
597
  resources = []
598
  for score, doc_id, text in scored_documents[:10]:
 
606
  "content": text[:200],
607
  "score": float(score)
608
  })
 
609
  return {"resources": resources, "success": True}
 
610
  except ValueError as ve:
611
  # Handle expected errors gracefully
612
  raise HTTPException(status_code=400, detail=str(ve))