Spaces:

nomadicsynth
/

inkling

Running on Zero

App Files Files Community

nomadicsynth commited on May 7

Commit

7177172

1 Parent(s): d5b5c7a

Refactor embedding model integration and update find_synergistic_papers calls to use dataset parameter

Browse files

Files changed (1) hide show

app.py +14 -13

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ from arxiv_stuff import ARXIV_CATEGORIES_FLAT
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Login to Hugging Face Hub
-hf_hub_login(token=HF_TOKEN, add_to_git_credential=True)
 # Check if using persistent storage
 persistent_storage = os.path.exists("/data")
@@ -142,6 +142,16 @@ def init_embedding_model(model_name_or_path: str, model_revision: str = None) ->
     )
 def init_reasoning_model(model_name: str) -> InferenceClient:
     global reasoning_model
     reasoning_model = InferenceClient(
@@ -271,16 +281,6 @@ def generate(messages: list[dict[str, str]]) -> str:
     return output
-@spaces.GPU
-def embed_text(text: str | list[str]) -> torch.Tensor:
-    global embedding_model
-    # Strip any leading/trailing whitespace
-    text = text.strip() if isinstance(text, str) else [t.strip() for t in text]
-    embed_text = embedding_model.encode(text, normalize_embeddings=True)  # Ensure vectors are normalized
-    return embed_text
 def analyse_abstracts(query_abstract: str, compare_abstract: dict) -> str:
     """Analyze the relationship between two abstracts and return formatted analysis"""
     global reasoning_model
@@ -464,7 +464,7 @@ def find_synergistic_papers(abstract: str, limit=25) -> list[dict]:
 def format_search_results_json(abstract: str) -> str:
     """Format search results as JSON for display"""
     # Find papers synergistic with the given abstract
-    papers = find_synergistic_papers(abstract)
     # Convert to JSON for display
     json_output = json.dumps(papers, indent=2)
@@ -475,7 +475,8 @@ def format_search_results_json(abstract: str) -> str:
 def format_search_results(abstract: str) -> tuple[pd.DataFrame, list[dict]]:
     """Format search results as a DataFrame for display"""
     # Find papers synergistic with the given abstract
-    papers = find_synergistic_papers(abstract)
     # Convert to DataFrame for display
     df = pd.DataFrame(

 HF_TOKEN = os.getenv("HF_TOKEN")
 # Login to Hugging Face Hub
+# hf_hub_login(token=HF_TOKEN, add_to_git_credential=True)
 # Check if using persistent storage
 persistent_storage = os.path.exists("/data")
     )
+@spaces.GPU
+def embed_text(text: str | list[str]) -> torch.Tensor:
+    global embedding_model
+    # Strip any leading/trailing whitespace
+    text = text.strip() if isinstance(text, str) else [t.strip() for t in text]
+    embed_text = embedding_model.encode(text, normalize_embeddings=True)  # Ensure vectors are normalized
+    return embed_text
 def init_reasoning_model(model_name: str) -> InferenceClient:
     global reasoning_model
     reasoning_model = InferenceClient(
     return output
 def analyse_abstracts(query_abstract: str, compare_abstract: dict) -> str:
     """Analyze the relationship between two abstracts and return formatted analysis"""
     global reasoning_model
 def format_search_results_json(abstract: str) -> str:
     """Format search results as JSON for display"""
     # Find papers synergistic with the given abstract
+    papers = embedding_model.find_synergistic_papers(dataset, abstract)
     # Convert to JSON for display
     json_output = json.dumps(papers, indent=2)
 def format_search_results(abstract: str) -> tuple[pd.DataFrame, list[dict]]:
     """Format search results as a DataFrame for display"""
     # Find papers synergistic with the given abstract
+    # papers = embedding_model.find_synergistic_papers(abstract)
+    papers = embedding_model.find_synergistic_papers(dataset, abstract)
     # Convert to DataFrame for display
     df = pd.DataFrame(