Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,12 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
|
3 |
import datetime
|
4 |
-
import operator
|
5 |
import pandas as pd
|
6 |
import tqdm.auto
|
7 |
from apscheduler.schedulers.background import BackgroundScheduler
|
8 |
from huggingface_hub import HfApi
|
9 |
|
10 |
import gradio as gr
|
11 |
-
from gradio_calendar import Calendar
|
12 |
-
import datasets
|
13 |
-
import requests
|
14 |
|
15 |
from datetime import timezone # Ensure timezone is imported
|
16 |
|
@@ -95,7 +91,7 @@ class Prettifier:
|
|
95 |
|
96 |
class PaperList:
|
97 |
"""
|
98 |
-
Manages the list of papers
|
99 |
"""
|
100 |
COLUMN_INFO = [
|
101 |
["arxiv_id", "str"], # Added arxiv_id
|
@@ -121,27 +117,11 @@ class PaperList:
|
|
121 |
def column_datatype(self):
|
122 |
return [col[1] for col in self.COLUMN_INFO]
|
123 |
|
124 |
-
def
|
125 |
-
self,
|
126 |
-
title_search_query: str,
|
127 |
-
max_num_to_retrieve: int = 1000, # Set a high default to include all if not specified
|
128 |
-
) -> pd.DataFrame:
|
129 |
"""
|
130 |
-
|
131 |
"""
|
132 |
-
|
133 |
-
|
134 |
-
# Filter by title if search query is provided
|
135 |
-
if title_search_query:
|
136 |
-
df = df[df["title"].str.contains(title_search_query, case=False, na=False)]
|
137 |
-
|
138 |
-
# Limit the number of papers to retrieve if max_num_to_retrieve is set
|
139 |
-
if max_num_to_retrieve:
|
140 |
-
df = df.head(max_num_to_retrieve)
|
141 |
-
|
142 |
-
# Prettify the DataFrame
|
143 |
-
df_prettified = self._prettifier(df).loc[:, self.column_names]
|
144 |
-
return df_prettified
|
145 |
|
146 |
|
147 |
# --- Sorting and Pagination Management ---
|
@@ -154,7 +134,6 @@ class PaperManager:
|
|
154 |
self.paper_list = paper_list
|
155 |
self.papers_per_page = papers_per_page
|
156 |
self.sort_method = "hot" # Default sort method
|
157 |
-
self.current_search_query = "" # Initialize with no search query
|
158 |
self.top_time_frame = "all time" # Default time frame for "Top" sorting
|
159 |
self.sort_papers()
|
160 |
# 'current_page' and 'total_pages' are set in 'sort_papers()'
|
@@ -181,14 +160,10 @@ class PaperManager:
|
|
181 |
|
182 |
def sort_papers(self):
|
183 |
"""
|
184 |
-
Sorts the papers based on the current sort method
|
185 |
"""
|
186 |
df = self.paper_list.df_raw.copy()
|
187 |
|
188 |
-
# Apply search filter if a search query exists
|
189 |
-
if self.current_search_query:
|
190 |
-
df = df[df["title"].str.contains(self.current_search_query, case=False, na=False)]
|
191 |
-
|
192 |
if self.sort_method == "hot":
|
193 |
if not df.empty:
|
194 |
df = df.drop(columns=['score'], errors='ignore') # Remove existing 'score' column if present
|
@@ -242,15 +217,6 @@ class PaperManager:
|
|
242 |
self.sort_papers()
|
243 |
return True # Assume success
|
244 |
|
245 |
-
def set_search_query(self, query: str):
|
246 |
-
"""
|
247 |
-
Sets the current search query and re-sorts the papers.
|
248 |
-
"""
|
249 |
-
print(f"Setting search query to: {query}")
|
250 |
-
self.current_search_query = query
|
251 |
-
self.sort_papers()
|
252 |
-
return True # Assume success
|
253 |
-
|
254 |
def get_current_page_papers(self) -> str:
|
255 |
"""
|
256 |
Retrieves the HTML string of the current page's papers.
|
@@ -399,22 +365,6 @@ def refresh_papers_ui() -> str:
|
|
399 |
return paper_manager.refresh()
|
400 |
|
401 |
|
402 |
-
def search_papers_ui(query: str) -> str:
|
403 |
-
"""
|
404 |
-
Searches for papers based on the title search query.
|
405 |
-
"""
|
406 |
-
paper_manager.set_search_query(query)
|
407 |
-
return paper_manager.get_current_page_papers()
|
408 |
-
|
409 |
-
|
410 |
-
def clear_search_ui() -> str:
|
411 |
-
"""
|
412 |
-
Clears the current search query and refreshes the paper list.
|
413 |
-
"""
|
414 |
-
paper_manager.set_search_query("")
|
415 |
-
return paper_manager.get_current_page_papers()
|
416 |
-
|
417 |
-
|
418 |
# --- CSS Styling ---
|
419 |
|
420 |
css = """
|
@@ -592,16 +542,6 @@ with demo:
|
|
592 |
</tr>
|
593 |
</table>
|
594 |
""")
|
595 |
-
# Search Bar and Clear Search Button
|
596 |
-
with gr.Row():
|
597 |
-
search_box = gr.Textbox(
|
598 |
-
label="Search Papers by Title",
|
599 |
-
placeholder="Enter keywords to search...",
|
600 |
-
lines=1,
|
601 |
-
interactive=True
|
602 |
-
)
|
603 |
-
search_button = gr.Button("Search")
|
604 |
-
clear_search_button = gr.Button("Clear Search")
|
605 |
# Sort Options and Time Frame (conditionally visible)
|
606 |
with gr.Row():
|
607 |
sort_radio = gr.Radio(
|
@@ -667,20 +607,13 @@ with demo:
|
|
667 |
outputs=[paper_list]
|
668 |
)
|
669 |
|
670 |
-
#
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
)
|
676 |
-
|
677 |
-
# Clear search functionality
|
678 |
-
clear_search_button.click(
|
679 |
-
fn=clear_search_ui,
|
680 |
-
inputs=None,
|
681 |
-
outputs=[paper_list]
|
682 |
-
)
|
683 |
-
|
684 |
|
685 |
|
686 |
# --- Launch the App ---
|
|
|
1 |
#!/usr/bin/env python
|
2 |
|
3 |
import datetime
|
|
|
4 |
import pandas as pd
|
5 |
import tqdm.auto
|
6 |
from apscheduler.schedulers.background import BackgroundScheduler
|
7 |
from huggingface_hub import HfApi
|
8 |
|
9 |
import gradio as gr
|
|
|
|
|
|
|
10 |
|
11 |
from datetime import timezone # Ensure timezone is imported
|
12 |
|
|
|
91 |
|
92 |
class PaperList:
|
93 |
"""
|
94 |
+
Manages the list of papers.
|
95 |
"""
|
96 |
COLUMN_INFO = [
|
97 |
["arxiv_id", "str"], # Added arxiv_id
|
|
|
117 |
def column_datatype(self):
|
118 |
return [col[1] for col in self.COLUMN_INFO]
|
119 |
|
120 |
+
def get_prettified_df(self) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
121 |
"""
|
122 |
+
Returns the prettified DataFrame.
|
123 |
"""
|
124 |
+
return self.df_prettified
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
|
127 |
# --- Sorting and Pagination Management ---
|
|
|
134 |
self.paper_list = paper_list
|
135 |
self.papers_per_page = papers_per_page
|
136 |
self.sort_method = "hot" # Default sort method
|
|
|
137 |
self.top_time_frame = "all time" # Default time frame for "Top" sorting
|
138 |
self.sort_papers()
|
139 |
# 'current_page' and 'total_pages' are set in 'sort_papers()'
|
|
|
160 |
|
161 |
def sort_papers(self):
|
162 |
"""
|
163 |
+
Sorts the papers based on the current sort method.
|
164 |
"""
|
165 |
df = self.paper_list.df_raw.copy()
|
166 |
|
|
|
|
|
|
|
|
|
167 |
if self.sort_method == "hot":
|
168 |
if not df.empty:
|
169 |
df = df.drop(columns=['score'], errors='ignore') # Remove existing 'score' column if present
|
|
|
217 |
self.sort_papers()
|
218 |
return True # Assume success
|
219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
def get_current_page_papers(self) -> str:
|
221 |
"""
|
222 |
Retrieves the HTML string of the current page's papers.
|
|
|
365 |
return paper_manager.refresh()
|
366 |
|
367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
# --- CSS Styling ---
|
369 |
|
370 |
css = """
|
|
|
542 |
</tr>
|
543 |
</table>
|
544 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
# Sort Options and Time Frame (conditionally visible)
|
546 |
with gr.Row():
|
547 |
sort_radio = gr.Radio(
|
|
|
607 |
outputs=[paper_list]
|
608 |
)
|
609 |
|
610 |
+
# Footer
|
611 |
+
gr.Markdown("""
|
612 |
+
Related useful Spaces:
|
613 |
+
- [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien)
|
614 |
+
- [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy)
|
615 |
+
- [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung)
|
616 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
|
618 |
|
619 |
# --- Launch the App ---
|