akhaliq HF staff commited on
Commit
157015b
Β·
verified Β·
1 Parent(s): c80b1ae
Files changed (1) hide show
  1. app.py +39 -12
app.py CHANGED
@@ -13,6 +13,7 @@ import datasets
13
  import requests
14
 
15
  from datetime import timezone # Ensure timezone is imported
 
16
 
17
  # --- Data Loading and Processing ---
18
 
@@ -37,6 +38,10 @@ def get_df() -> pd.DataFrame:
37
  df["date"] = pd.to_datetime(df["date"], errors='coerce')
38
  df["date"] = df["date"].dt.strftime("%Y-%m-%d").fillna(datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
39
 
 
 
 
 
40
  # Prepare the DataFrame by removing 'abstract'
41
  paper_info = []
42
  for _, row in tqdm.auto.tqdm(df.iterrows(), total=len(df)):
@@ -56,6 +61,18 @@ class Prettifier:
56
  """
57
  Converts raw DataFrame rows into a prettified format suitable for display.
58
  """
 
 
 
 
 
 
 
 
 
 
 
 
59
  @staticmethod
60
  def get_github_link(link: str) -> str:
61
  if not link:
@@ -70,20 +87,23 @@ class Prettifier:
70
  new_rows = []
71
  for _, row in df.iterrows():
72
  # Handle date_display as a clickable link
73
- date_display = Prettifier.create_link(row.date, f"https://huggingface.co/papers?date={row.date}")
74
 
75
  new_row = {
76
  "arxiv_id": row.get("arxiv_id", ""), # Include arxiv_id
77
  "date_display": date_display, # For display
78
  "date": row.get("date", datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")), # For internal calculations
79
- "paper_page": Prettifier.create_link(row.get("arxiv_id", ""), row.get("paper_page", "#")),
80
  "title": row.get("title", "No title"),
81
- "github": Prettifier.get_github_link(row.get("github", "")),
82
  "πŸ‘": row.get("upvotes", 0),
83
  "πŸ’¬": row.get("num_comments", 0),
84
  }
85
  new_rows.append(new_row)
86
- return pd.DataFrame(new_rows)
 
 
 
87
 
88
 
89
  class PaperList:
@@ -205,9 +225,11 @@ class PaperManager:
205
 
206
  # Convert 'date' column to datetime
207
  df_sorted = df.copy()
208
- df_sorted['date_parsed'] = pd.to_datetime(df_sorted['date'], errors='coerce').dt.tz_localize(timezone.utc)
 
 
209
  df_sorted = df_sorted[df_sorted['date_parsed'] >= time_threshold]
210
- df_sorted = df_sorted.sort_values(by='upvotes', ascending=False).drop(columns=['date_parsed'])
211
  else:
212
  df_sorted = df
213
 
@@ -235,8 +257,9 @@ class PaperManager:
235
  """
236
  Sets the current search query and re-sorts the papers.
237
  """
238
- print(f"Setting search query to: {query}")
239
- self.current_search_query = query
 
240
  self.sort_papers()
241
  return True # Assume success
242
 
@@ -374,10 +397,7 @@ def change_sort_method_ui(method: str, time_frame: str = "all time") -> str:
374
  """
375
  Changes the sort method and, if 'top' is selected, sets the time frame.
376
  """
377
- if method.lower() == "top":
378
- paper_manager.set_sort_method(method.lower(), time_frame)
379
- else:
380
- paper_manager.set_sort_method(method.lower())
381
  return paper_manager.get_current_page_papers()
382
 
383
 
@@ -670,6 +690,13 @@ with demo:
670
  outputs=[paper_list]
671
  )
672
 
 
 
 
 
 
 
 
673
 
674
 
675
  # --- Launch the App ---
 
13
  import requests
14
 
15
  from datetime import timezone # Ensure timezone is imported
16
+ import html # For sanitizing user inputs
17
 
18
  # --- Data Loading and Processing ---
19
 
 
38
  df["date"] = pd.to_datetime(df["date"], errors='coerce')
39
  df["date"] = df["date"].dt.strftime("%Y-%m-%d").fillna(datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
40
 
41
+ # Ensure 'upvotes' and 'num_comments' are present and fill NaNs with 0
42
+ df['upvotes'] = df.get('upvotes', 0).fillna(0)
43
+ df['num_comments'] = df.get('num_comments', 0).fillna(0)
44
+
45
  # Prepare the DataFrame by removing 'abstract'
46
  paper_info = []
47
  for _, row in tqdm.auto.tqdm(df.iterrows(), total=len(df)):
 
61
  """
62
  Converts raw DataFrame rows into a prettified format suitable for display.
63
  """
64
+ def __init__(self):
65
+ self.expected_columns = [
66
+ "arxiv_id",
67
+ "date_display",
68
+ "date",
69
+ "paper_page",
70
+ "title",
71
+ "github",
72
+ "πŸ‘",
73
+ "πŸ’¬",
74
+ ]
75
+
76
  @staticmethod
77
  def get_github_link(link: str) -> str:
78
  if not link:
 
87
  new_rows = []
88
  for _, row in df.iterrows():
89
  # Handle date_display as a clickable link
90
+ date_display = self.create_link(row.date, f"https://huggingface.co/papers?date={row.date}")
91
 
92
  new_row = {
93
  "arxiv_id": row.get("arxiv_id", ""), # Include arxiv_id
94
  "date_display": date_display, # For display
95
  "date": row.get("date", datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")), # For internal calculations
96
+ "paper_page": self.create_link(row.get("arxiv_id", ""), row.get("paper_page", "#")),
97
  "title": row.get("title", "No title"),
98
+ "github": self.get_github_link(row.get("github", "")),
99
  "πŸ‘": row.get("upvotes", 0),
100
  "πŸ’¬": row.get("num_comments", 0),
101
  }
102
  new_rows.append(new_row)
103
+ if not new_rows:
104
+ return pd.DataFrame(columns=self.expected_columns)
105
+ else:
106
+ return pd.DataFrame(new_rows)
107
 
108
 
109
  class PaperList:
 
225
 
226
  # Convert 'date' column to datetime
227
  df_sorted = df.copy()
228
+ df_sorted['date_parsed'] = pd.to_datetime(df_sorted['date'], errors='coerce').dt.tz_localize(timezone.utc, ambiguous='NaT', nonexistent='NaT')
229
+ # Handle NaT values by setting them to a very old date
230
+ df_sorted['date_parsed'] = df_sorted['date_parsed'].fillna(datetime.datetime.min.replace(tzinfo=timezone.utc))
231
  df_sorted = df_sorted[df_sorted['date_parsed'] >= time_threshold]
232
+ df_sorted = df_sorted.sort_values(by='πŸ‘', ascending=False).drop(columns=['date_parsed'])
233
  else:
234
  df_sorted = df
235
 
 
257
  """
258
  Sets the current search query and re-sorts the papers.
259
  """
260
+ sanitized_query = html.escape(query) # Sanitize user input
261
+ print(f"Setting search query to: {sanitized_query}")
262
+ self.current_search_query = sanitized_query
263
  self.sort_papers()
264
  return True # Assume success
265
 
 
397
  """
398
  Changes the sort method and, if 'top' is selected, sets the time frame.
399
  """
400
+ paper_manager.set_sort_method(method.lower(), time_frame if method.lower() == "top" else None)
 
 
 
401
  return paper_manager.get_current_page_papers()
402
 
403
 
 
690
  outputs=[paper_list]
691
  )
692
 
693
+ # Footer
694
+ gr.Markdown("""
695
+ Related useful Spaces:
696
+ - [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien)
697
+ - [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy)
698
+ - [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung)
699
+ """)
700
 
701
 
702
  # --- Launch the App ---