akhaliq HF staff commited on
Commit
1714fcd
·
verified ·
1 Parent(s): 4f9c2ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -196
app.py CHANGED
@@ -166,38 +166,6 @@ class PaperList:
166
  return df_prettified
167
 
168
 
169
- # Initialize PaperList
170
- paper_list = PaperList(get_df())
171
-
172
-
173
- def update_paper_list() -> None:
174
- global paper_list
175
- paper_list = PaperList(get_df())
176
-
177
-
178
- # Scheduler for updating paper list every hour
179
- scheduler_data = BackgroundScheduler()
180
- scheduler_data.add_job(
181
- func=update_paper_list,
182
- trigger="cron",
183
- minute=0, # Every hour at minute 0
184
- timezone="UTC",
185
- misfire_grace_time=60,
186
- )
187
- scheduler_data.start()
188
-
189
-
190
- # --- Gradio App ---
191
-
192
- DESCRIPTION = "# [Daily Papers](https://huggingface.co/papers)"
193
-
194
- FOOT_NOTE = """\
195
- Related useful Spaces:
196
- - [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien)
197
- - [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy)
198
- - [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung)
199
- """
200
-
201
  # --- Sorting and Pagination Management ---
202
 
203
  class PaperManager:
@@ -205,18 +173,20 @@ class PaperManager:
205
  self.paper_list = paper_list
206
  self.papers_per_page = papers_per_page
207
  self.current_page = 1
208
- self.total_pages = max((len(self.paper_list.df_raw) + self.papers_per_page - 1) // self.papers_per_page, 1)
209
  self.sort_method = "hot" # Default sort method
 
 
210
 
211
- def calculate_score(self, paper):
212
  """
213
  Calculate the score of a paper based on upvotes and age.
214
  This mimics the "hotness" algorithm used by platforms like Hacker News.
215
  """
216
- upvotes = paper.get('upvotes', 0)
217
- published_at_str = paper.get('date', datetime.datetime.now(timezone.utc).isoformat())
218
  try:
219
- published_time = datetime.datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
 
220
  except ValueError:
221
  # If parsing fails, use current time to minimize the impact on sorting
222
  published_time = datetime.datetime.now(datetime.timezone.utc)
@@ -252,78 +222,109 @@ class PaperManager:
252
  self.sort_papers()
253
  return True # Assume success
254
 
255
- def get_current_page_papers(self):
256
  start = (self.current_page - 1) * self.papers_per_page
257
  end = start + self.papers_per_page
258
  current_papers = self.paper_list.df_prettified.iloc[start:end]
259
- return current_papers
260
 
261
- def next_page(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  if self.current_page < self.total_pages:
263
  self.current_page += 1
264
  return self.get_current_page_papers()
265
 
266
- def prev_page(self):
267
  if self.current_page > 1:
268
  self.current_page -= 1
269
  return self.get_current_page_papers()
270
 
271
- def refresh(self):
272
  self.sort_papers()
273
  return self.get_current_page_papers()
274
 
275
 
276
- # Initialize PaperManager
277
- paper_manager = PaperManager(paper_list)
 
 
 
 
 
 
 
278
 
279
 
280
- def refresh_paper_manager():
281
  global paper_manager
282
- paper_manager = PaperManager(paper_list)
283
- if paper_manager.sort_method:
284
- paper_manager.sort_papers()
285
  return paper_manager.get_current_page_papers()
286
 
287
 
288
- # --- Gradio Interface Functions ---
289
-
290
- def update_num_papers(current_df: pd.DataFrame) -> str:
291
- return f"{len(current_df)} / {len(paper_manager.paper_list.df_raw)}"
292
-
293
-
294
- def perform_search(
295
- start_date: datetime.datetime,
296
- end_date: datetime.datetime,
297
- search_title: str,
298
- search_abstract: str,
299
- max_num_to_retrieve: int,
300
- sort_method: str
301
- ) -> pd.DataFrame:
302
- # Update sort method
303
- paper_manager.set_sort_method(sort_method.lower())
304
-
305
- # Perform search
306
- searched_df = paper_manager.paper_list.search(start_date, end_date, search_title, search_abstract, max_num_to_retrieve)
307
-
308
- # Update PaperList with searched results
309
- paper_manager.paper_list.df_raw = searched_df.copy()
310
- paper_manager.paper_list.df_prettified = paper_manager.paper_list._prettifier(searched_df).loc[:, paper_manager.paper_list.column_names]
311
- paper_manager.total_pages = max((len(searched_df) + paper_manager.papers_per_page - 1) // paper_manager.papers_per_page, 1)
312
- paper_manager.current_page = 1
313
-
314
- # Apply sorting
315
- paper_manager.sort_papers()
316
 
317
- return paper_manager.get_current_page_papers()
318
 
 
319
 
320
- def change_sort_method(method: str) -> pd.DataFrame:
321
  paper_manager.set_sort_method(method.lower())
322
  return paper_manager.get_current_page_papers()
323
 
324
 
325
- def get_initial_papers() -> pd.DataFrame:
326
- return paper_manager.get_current_page_papers()
327
 
328
 
329
  # --- CSS Styling ---
@@ -504,7 +505,7 @@ with demo:
504
  </table>
505
  """)
506
 
507
- # Sorting Options
508
  with gr.Row():
509
  sort_radio = gr.Radio(
510
  choices=["Hot", "New"],
@@ -513,125 +514,48 @@ with demo:
513
  interactive=True
514
  )
515
 
516
- # Search and Filter Inputs
517
- with gr.Group():
518
- search_title = gr.Textbox(label="Search Title")
519
- with gr.Row():
520
- with gr.Column(scale=4):
521
- search_abstract = gr.Textbox(
522
- label="Search Abstract",
523
- info="The result may not be accurate as the abstract does not contain all the information.",
524
- )
525
- with gr.Column(scale=1):
526
- max_num_to_retrieve = gr.Slider(
527
- label="Max Number to Retrieve",
528
- info="This is used only for search on abstracts.",
529
- minimum=1,
530
- maximum=1000, # Adjust as needed
531
- step=1,
532
- value=100,
533
- )
534
- with gr.Row():
535
- start_date = Calendar(label="Start Date", type="date", value="2023-05-05")
536
- end_date = Calendar(label="End Date", type="date", value=datetime.datetime.utcnow().strftime("%Y-%m-%d"))
537
-
538
- search_button = gr.Button("Search")
539
-
540
- # Number of Papers Display
541
- num_papers = gr.Textbox(label="Number of Papers", value=update_num_papers(paper_manager.get_current_page_papers()), interactive=False)
542
-
543
- # Paper List Display
544
- df_display = gr.DataFrame(
545
- value=paper_manager.get_current_page_papers(),
546
- datatype=paper_manager.paper_list.column_datatype,
547
- type="pandas",
548
- interactive=False,
549
- height=600,
550
- elem_id="table",
551
- column_widths=["10%", "10%", "60%", "10%", "5%", "5%"],
552
- wrap=True,
553
- )
554
-
555
- # Pagination Buttons
556
  with gr.Row():
557
  prev_button = gr.Button("Prev")
558
  next_button = gr.Button("Next")
559
 
560
- # Footer
561
- gr.Markdown(FOOT_NOTE)
562
-
563
- # Hidden Refresh Button
564
- refresh_button = gr.Button("Refresh", visible=False, elem_id="refresh-hidden")
565
- refresh_button.click(refresh_paper_manager, outputs=[df_display])
566
-
567
- # Bind the visible Refresh button to the hidden one using JavaScript
568
- gr.HTML("""
569
- <script>
570
- document.getElementById('refresh-button').addEventListener('click', function() {
571
- document.getElementById('refresh-hidden').click();
572
- });
573
- </script>
574
- """)
575
-
576
- # Event Handlers
577
-
578
- # Search Button Click
579
- search_button.click(
580
- fn=perform_search,
581
- inputs=[start_date, end_date, search_title, search_abstract, max_num_to_retrieve, sort_radio],
582
- outputs=[df_display],
583
- ).then(
584
- fn=update_num_papers,
585
- inputs=df_display,
586
- outputs=num_papers,
587
- queue=False,
588
- )
589
-
590
- # Sort Radio Change
591
- sort_radio.change(
592
- fn=change_sort_method,
593
- inputs=[sort_radio],
594
- outputs=[df_display],
595
- ).then(
596
- fn=update_num_papers,
597
- inputs=df_display,
598
- outputs=num_papers,
599
- queue=False,
600
- )
601
-
602
- # Pagination Buttons
603
- prev_button.click(
604
- fn=paper_manager.prev_page,
605
- inputs=None,
606
- outputs=[df_display],
607
- ).then(
608
- fn=update_num_papers,
609
- inputs=df_display,
610
- outputs=num_papers,
611
- queue=False,
612
- )
613
-
614
- next_button.click(
615
- fn=paper_manager.next_page,
616
- inputs=None,
617
- outputs=[df_display],
618
- ).then(
619
- fn=update_num_papers,
620
- inputs=df_display,
621
- outputs=num_papers,
622
- queue=False,
623
- )
624
-
625
- # Initial Load
626
- demo.load(
627
- fn=get_initial_papers,
628
- outputs=[df_display],
629
- ).then(
630
- fn=update_num_papers,
631
- inputs=df_display,
632
- outputs=num_papers,
633
- queue=False,
634
- )
635
 
636
  # --- Launch the App ---
637
 
 
166
  return df_prettified
167
 
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  # --- Sorting and Pagination Management ---
170
 
171
  class PaperManager:
 
173
  self.paper_list = paper_list
174
  self.papers_per_page = papers_per_page
175
  self.current_page = 1
 
176
  self.sort_method = "hot" # Default sort method
177
+ self.total_pages = max((len(self.paper_list.df_raw) + self.papers_per_page - 1) // self.papers_per_page, 1)
178
+ self.sort_papers()
179
 
180
+ def calculate_score(self, row):
181
  """
182
  Calculate the score of a paper based on upvotes and age.
183
  This mimics the "hotness" algorithm used by platforms like Hacker News.
184
  """
185
+ upvotes = row.get('👍', 0)
186
+ published_at_str = row.get('date', datetime.datetime.now(timezone.utc).isoformat())
187
  try:
188
+ published_time = datetime.datetime.strptime(published_at_str, "%Y-%m-%d")
189
+ published_time = published_time.replace(tzinfo=datetime.timezone.utc)
190
  except ValueError:
191
  # If parsing fails, use current time to minimize the impact on sorting
192
  published_time = datetime.datetime.now(datetime.timezone.utc)
 
222
  self.sort_papers()
223
  return True # Assume success
224
 
225
+ def get_current_page_papers(self) -> str:
226
  start = (self.current_page - 1) * self.papers_per_page
227
  end = start + self.papers_per_page
228
  current_papers = self.paper_list.df_prettified.iloc[start:end]
 
229
 
230
+ if current_papers.empty:
231
+ return "<div class='no-papers'>No papers available for this page.</div>"
232
+
233
+ papers_html = "".join([self.format_paper(row, idx + start + 1) for idx, row in current_papers.iterrows()])
234
+ return f"""
235
+ <table border="0" cellpadding="0" cellspacing="0" class="itemlist">
236
+ {papers_html}
237
+ </table>
238
+ """
239
+
240
+ def format_paper(self, row, rank):
241
+ title = row.get('title', 'No title')
242
+ paper_id = row.get('arxiv_id', '')
243
+ url = f"https://huggingface.co/papers/{paper_id}"
244
+ authors = 'Unknown' # Assuming authors are not present in the current dataset
245
+ upvotes = row.get('👍', 0)
246
+ comments = row.get('💬', 0)
247
+ published_time_str = row.get('date', datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
248
+ try:
249
+ published_time = datetime.datetime.strptime(published_time_str, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc)
250
+ except ValueError:
251
+ published_time = datetime.datetime.now(datetime.timezone.utc)
252
+ time_diff = datetime.datetime.now(datetime.timezone.utc) - published_time
253
+ time_ago_days = time_diff.days
254
+ time_ago = f"{time_ago_days} days ago" if time_ago_days > 0 else "today"
255
+
256
+ return f"""
257
+ <tr class="athing">
258
+ <td align="right" valign="top" class="title"><span class="rank">{rank}.</span></td>
259
+ <td valign="top" class="title">
260
+ <a href="{url}" class="storylink" target="_blank">{title}</a>
261
+ </td>
262
+ </tr>
263
+ <tr>
264
+ <td colspan="1"></td>
265
+ <td class="subtext">
266
+ <span class="score">{upvotes} upvotes</span><br>
267
+ authors: {authors} | {time_ago} | <a href="#">{comments} comments</a>
268
+ </td>
269
+ </tr>
270
+ <tr style="height:5px"></tr>
271
+ """
272
+
273
+ def next_page(self) -> str:
274
  if self.current_page < self.total_pages:
275
  self.current_page += 1
276
  return self.get_current_page_papers()
277
 
278
+ def prev_page(self) -> str:
279
  if self.current_page > 1:
280
  self.current_page -= 1
281
  return self.get_current_page_papers()
282
 
283
+ def refresh(self) -> str:
284
  self.sort_papers()
285
  return self.get_current_page_papers()
286
 
287
 
288
+ # Initialize PaperList and PaperManager
289
+ def initialize_paper_manager() -> PaperManager:
290
+ df = get_df()
291
+ paper_list = PaperList(df)
292
+ manager = PaperManager(paper_list)
293
+ return manager
294
+
295
+
296
+ paper_manager = initialize_paper_manager()
297
 
298
 
299
+ def update_paper_manager() -> str:
300
  global paper_manager
301
+ df = get_df()
302
+ paper_manager.paper_list = PaperList(df)
303
+ paper_manager.sort_papers()
304
  return paper_manager.get_current_page_papers()
305
 
306
 
307
+ # Scheduler for updating paper list every hour
308
+ scheduler_data = BackgroundScheduler()
309
+ scheduler_data.add_job(
310
+ func=update_paper_manager,
311
+ trigger="cron",
312
+ minute=0, # Every hour at minute 0
313
+ timezone="UTC",
314
+ misfire_grace_time=60,
315
+ )
316
+ scheduler_data.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
 
318
 
319
+ # --- Gradio Interface Functions ---
320
 
321
+ def change_sort_method_ui(method: str) -> str:
322
  paper_manager.set_sort_method(method.lower())
323
  return paper_manager.get_current_page_papers()
324
 
325
 
326
+ def refresh_papers_ui() -> str:
327
+ return paper_manager.refresh()
328
 
329
 
330
  # --- CSS Styling ---
 
505
  </table>
506
  """)
507
 
508
+ # Sort Options
509
  with gr.Row():
510
  sort_radio = gr.Radio(
511
  choices=["Hot", "New"],
 
514
  interactive=True
515
  )
516
 
517
+ # Paper list
518
+ paper_list = gr.HTML()
519
+
520
+ # Navigation Buttons
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  with gr.Row():
522
  prev_button = gr.Button("Prev")
523
  next_button = gr.Button("Next")
524
 
525
+ # Load papers on app start
526
+ demo.load(initialize_paper_manager, outputs=[paper_list])
527
+
528
+ # Button clicks for pagination
529
+ prev_button.click(paper_manager.prev_page, outputs=[paper_list])
530
+ next_button.click(paper_manager.next_page, outputs=[paper_list])
531
+
532
+ # Refresh functionality
533
+ refresh_button = gr.Button("Refresh", visible=False, elem_id="refresh-hidden")
534
+ refresh_button.click(update_paper_manager, outputs=[paper_list])
535
+
536
+ # Bind the visible Refresh button to the hidden one using JavaScript
537
+ gr.HTML("""
538
+ <script>
539
+ document.getElementById('refresh-button').addEventListener('click', function() {
540
+ document.getElementById('refresh-hidden').click();
541
+ });
542
+ </script>
543
+ """)
544
+
545
+ # Sort option change
546
+ sort_radio.change(
547
+ fn=change_sort_method_ui,
548
+ inputs=[sort_radio],
549
+ outputs=[paper_list]
550
+ )
551
+
552
+ # Footer
553
+ gr.Markdown("""
554
+ Related useful Spaces:
555
+ - [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien)
556
+ - [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy)
557
+ - [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung)
558
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  # --- Launch the App ---
561