Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,8 @@ import arxiv
|
|
| 17 |
import scholarly
|
| 18 |
import pymed
|
| 19 |
import wikipedia
|
| 20 |
-
from newspaper import Article
|
|
|
|
| 21 |
import pickle
|
| 22 |
import faiss
|
| 23 |
import threading
|
|
@@ -33,9 +34,9 @@ if not HF_API_KEY:
|
|
| 33 |
|
| 34 |
client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
|
| 35 |
|
| 36 |
-
MAIN_LLM_MODEL = "
|
| 37 |
-
REASONING_LLM_MODEL = "
|
| 38 |
-
CRITIC_LLM_MODEL = "
|
| 39 |
ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL]
|
| 40 |
|
| 41 |
MAX_ITERATIONS = 20
|
|
@@ -158,7 +159,7 @@ def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str
|
|
| 158 |
|
| 159 |
results = [r for r in ddgs.text(**kwargs)]
|
| 160 |
if results:
|
| 161 |
-
return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
|
| 162 |
else:
|
| 163 |
if time_filter and "time" in kwargs:
|
| 164 |
del kwargs["time"]
|
|
@@ -465,9 +466,8 @@ def tool_identify_focus_areas(prompt: str, insights: list = [],
|
|
| 465 |
return []
|
| 466 |
|
| 467 |
def add_to_faiss_index(text: str):
|
| 468 |
-
"""Adds the embedding of the given text to the FAISS index."""
|
| 469 |
embedding = document_similarity_model.encode(text, convert_to_tensor=True)
|
| 470 |
-
embedding_np = embedding.cpu().numpy().reshape(1, -1)
|
| 471 |
if embedding_np.shape[1] != embedding_dim:
|
| 472 |
logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
|
| 473 |
return
|
|
@@ -475,12 +475,11 @@ def add_to_faiss_index(text: str):
|
|
| 475 |
index.add(embedding_np)
|
| 476 |
|
| 477 |
def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
|
| 478 |
-
"""Searches the FAISS index for the most similar texts to the query."""
|
| 479 |
query_embedding = document_similarity_model.encode(query, convert_to_tensor=True)
|
| 480 |
query_embedding_np = query_embedding.cpu().numpy().reshape(1, -1)
|
| 481 |
faiss.normalize_L2(query_embedding_np)
|
| 482 |
distances, indices = index.search(query_embedding_np, top_k)
|
| 483 |
-
return indices[0].tolist()
|
| 484 |
|
| 485 |
def filter_results(search_results, prompt, previous_snippets=None):
|
| 486 |
if not main_similarity_model or not search_results:
|
|
@@ -507,7 +506,6 @@ def filter_results(search_results, prompt, previous_snippets=None):
|
|
| 507 |
result['relevance_score'] = cosine_score
|
| 508 |
filtered_results.append(result)
|
| 509 |
seen_snippets.add(result['snippet'])
|
| 510 |
-
# Add snippet to FAISS index
|
| 511 |
add_to_faiss_index(result['snippet'])
|
| 512 |
|
| 513 |
|
|
|
|
| 17 |
import scholarly
|
| 18 |
import pymed
|
| 19 |
import wikipedia
|
| 20 |
+
#from newspaper import Article # Removed direct import
|
| 21 |
+
from newspaper3k import Article # Import from newspaper3k
|
| 22 |
import pickle
|
| 23 |
import faiss
|
| 24 |
import threading
|
|
|
|
| 34 |
|
| 35 |
client = InferenceClient(provider="hf-inference", api_key=HF_API_KEY)
|
| 36 |
|
| 37 |
+
MAIN_LLM_MODEL = "meta-llama/Llama-3-70b-instruct"
|
| 38 |
+
REASONING_LLM_MODEL = "anthropic/claude-3-opus-20240229"
|
| 39 |
+
CRITIC_LLM_MODEL = "google/gemini-1.5-pro"
|
| 40 |
ENSEMBLE_MODELS = [MAIN_LLM_MODEL, REASONING_LLM_MODEL, CRITIC_LLM_MODEL]
|
| 41 |
|
| 42 |
MAX_ITERATIONS = 20
|
|
|
|
| 159 |
|
| 160 |
results = [r for r in ddgs.text(**kwargs)]
|
| 161 |
if results:
|
| 162 |
+
return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
|
| 163 |
else:
|
| 164 |
if time_filter and "time" in kwargs:
|
| 165 |
del kwargs["time"]
|
|
|
|
| 466 |
return []
|
| 467 |
|
| 468 |
def add_to_faiss_index(text: str):
|
|
|
|
| 469 |
embedding = document_similarity_model.encode(text, convert_to_tensor=True)
|
| 470 |
+
embedding_np = embedding.cpu().numpy().reshape(1, -1)
|
| 471 |
if embedding_np.shape[1] != embedding_dim:
|
| 472 |
logger.error(f"Embedding dimension mismatch: expected {embedding_dim}, got {embedding_np.shape[1]}")
|
| 473 |
return
|
|
|
|
| 475 |
index.add(embedding_np)
|
| 476 |
|
| 477 |
def search_faiss_index(query: str, top_k: int = 5) -> List[str]:
|
|
|
|
| 478 |
query_embedding = document_similarity_model.encode(query, convert_to_tensor=True)
|
| 479 |
query_embedding_np = query_embedding.cpu().numpy().reshape(1, -1)
|
| 480 |
faiss.normalize_L2(query_embedding_np)
|
| 481 |
distances, indices = index.search(query_embedding_np, top_k)
|
| 482 |
+
return indices[0].tolist()
|
| 483 |
|
| 484 |
def filter_results(search_results, prompt, previous_snippets=None):
|
| 485 |
if not main_similarity_model or not search_results:
|
|
|
|
| 506 |
result['relevance_score'] = cosine_score
|
| 507 |
filtered_results.append(result)
|
| 508 |
seen_snippets.add(result['snippet'])
|
|
|
|
| 509 |
add_to_faiss_index(result['snippet'])
|
| 510 |
|
| 511 |
|