akhaliq HF staff commited on
Commit
a575839
Β·
verified Β·
1 Parent(s): 157015b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -36
app.py CHANGED
@@ -13,7 +13,6 @@ import datasets
13
  import requests
14
 
15
  from datetime import timezone # Ensure timezone is imported
16
- import html # For sanitizing user inputs
17
 
18
  # --- Data Loading and Processing ---
19
 
@@ -38,10 +37,6 @@ def get_df() -> pd.DataFrame:
38
  df["date"] = pd.to_datetime(df["date"], errors='coerce')
39
  df["date"] = df["date"].dt.strftime("%Y-%m-%d").fillna(datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
40
 
41
- # Ensure 'upvotes' and 'num_comments' are present and fill NaNs with 0
42
- df['upvotes'] = df.get('upvotes', 0).fillna(0)
43
- df['num_comments'] = df.get('num_comments', 0).fillna(0)
44
-
45
  # Prepare the DataFrame by removing 'abstract'
46
  paper_info = []
47
  for _, row in tqdm.auto.tqdm(df.iterrows(), total=len(df)):
@@ -61,18 +56,8 @@ class Prettifier:
61
  """
62
  Converts raw DataFrame rows into a prettified format suitable for display.
63
  """
64
- def __init__(self):
65
- self.expected_columns = [
66
- "arxiv_id",
67
- "date_display",
68
- "date",
69
- "paper_page",
70
- "title",
71
- "github",
72
- "πŸ‘",
73
- "πŸ’¬",
74
- ]
75
-
76
  @staticmethod
77
  def get_github_link(link: str) -> str:
78
  if not link:
@@ -87,23 +72,25 @@ class Prettifier:
87
  new_rows = []
88
  for _, row in df.iterrows():
89
  # Handle date_display as a clickable link
90
- date_display = self.create_link(row.date, f"https://huggingface.co/papers?date={row.date}")
91
 
92
  new_row = {
93
- "arxiv_id": row.get("arxiv_id", ""), # Include arxiv_id
94
- "date_display": date_display, # For display
95
  "date": row.get("date", datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")), # For internal calculations
96
- "paper_page": self.create_link(row.get("arxiv_id", ""), row.get("paper_page", "#")),
97
  "title": row.get("title", "No title"),
98
- "github": self.get_github_link(row.get("github", "")),
99
  "πŸ‘": row.get("upvotes", 0),
100
  "πŸ’¬": row.get("num_comments", 0),
101
  }
102
  new_rows.append(new_row)
 
 
103
  if not new_rows:
104
- return pd.DataFrame(columns=self.expected_columns)
105
- else:
106
- return pd.DataFrame(new_rows)
107
 
108
 
109
  class PaperList:
@@ -128,11 +115,11 @@ class PaperList:
128
 
129
  @property
130
  def column_names(self):
131
- return list(map(operator.itemgetter(0), self.COLUMN_INFO))
132
 
133
  @property
134
  def column_datatype(self):
135
- return list(map(operator.itemgetter(1), self.COLUMN_INFO))
136
 
137
  def search(
138
  self,
@@ -225,11 +212,9 @@ class PaperManager:
225
 
226
  # Convert 'date' column to datetime
227
  df_sorted = df.copy()
228
- df_sorted['date_parsed'] = pd.to_datetime(df_sorted['date'], errors='coerce').dt.tz_localize(timezone.utc, ambiguous='NaT', nonexistent='NaT')
229
- # Handle NaT values by setting them to a very old date
230
- df_sorted['date_parsed'] = df_sorted['date_parsed'].fillna(datetime.datetime.min.replace(tzinfo=timezone.utc))
231
  df_sorted = df_sorted[df_sorted['date_parsed'] >= time_threshold]
232
- df_sorted = df_sorted.sort_values(by='πŸ‘', ascending=False).drop(columns=['date_parsed'])
233
  else:
234
  df_sorted = df
235
 
@@ -257,9 +242,8 @@ class PaperManager:
257
  """
258
  Sets the current search query and re-sorts the papers.
259
  """
260
- sanitized_query = html.escape(query) # Sanitize user input
261
- print(f"Setting search query to: {sanitized_query}")
262
- self.current_search_query = sanitized_query
263
  self.sort_papers()
264
  return True # Assume success
265
 
@@ -397,7 +381,10 @@ def change_sort_method_ui(method: str, time_frame: str = "all time") -> str:
397
  """
398
  Changes the sort method and, if 'top' is selected, sets the time frame.
399
  """
400
- paper_manager.set_sort_method(method.lower(), time_frame if method.lower() == "top" else None)
 
 
 
401
  return paper_manager.get_current_page_papers()
402
 
403
 
@@ -702,4 +689,4 @@ with demo:
702
  # --- Launch the App ---
703
 
704
  if __name__ == "__main__":
705
- demo.launch()
 
13
  import requests
14
 
15
  from datetime import timezone # Ensure timezone is imported
 
16
 
17
  # --- Data Loading and Processing ---
18
 
 
37
  df["date"] = pd.to_datetime(df["date"], errors='coerce')
38
  df["date"] = df["date"].dt.strftime("%Y-%m-%d").fillna(datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
39
 
 
 
 
 
40
  # Prepare the DataFrame by removing 'abstract'
41
  paper_info = []
42
  for _, row in tqdm.auto.tqdm(df.iterrows(), total=len(df)):
 
56
  """
57
  Converts raw DataFrame rows into a prettified format suitable for display.
58
  """
59
+ REQUIRED_COLUMNS = ["arxiv_id", "date_display", "date", "paper_page", "title", "github", "πŸ‘", "πŸ’¬"]
60
+
 
 
 
 
 
 
 
 
 
 
61
  @staticmethod
62
  def get_github_link(link: str) -> str:
63
  if not link:
 
72
  new_rows = []
73
  for _, row in df.iterrows():
74
  # Handle date_display as a clickable link
75
+ date_display = Prettifier.create_link(row.get("date", ""), f"https://huggingface.co/papers?date={row.get('date', '')}")
76
 
77
  new_row = {
78
+ "arxiv_id": row.get("arxiv_id", ""), # Include arxiv_id
79
+ "date_display": date_display, # For display
80
  "date": row.get("date", datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")), # For internal calculations
81
+ "paper_page": Prettifier.create_link(row.get("arxiv_id", ""), row.get("paper_page", "#")),
82
  "title": row.get("title", "No title"),
83
+ "github": Prettifier.get_github_link(row.get("github", "")),
84
  "πŸ‘": row.get("upvotes", 0),
85
  "πŸ’¬": row.get("num_comments", 0),
86
  }
87
  new_rows.append(new_row)
88
+
89
+ # If no rows, return empty DataFrame with required columns to prevent KeyError
90
  if not new_rows:
91
+ return pd.DataFrame(columns=self.REQUIRED_COLUMNS)
92
+
93
+ return pd.DataFrame(new_rows)
94
 
95
 
96
  class PaperList:
 
115
 
116
  @property
117
  def column_names(self):
118
+ return [col[0] for col in self.COLUMN_INFO]
119
 
120
  @property
121
  def column_datatype(self):
122
+ return [col[1] for col in self.COLUMN_INFO]
123
 
124
  def search(
125
  self,
 
212
 
213
  # Convert 'date' column to datetime
214
  df_sorted = df.copy()
215
+ df_sorted['date_parsed'] = pd.to_datetime(df_sorted['date'], errors='coerce').dt.tz_localize(timezone.utc)
 
 
216
  df_sorted = df_sorted[df_sorted['date_parsed'] >= time_threshold]
217
+ df_sorted = df_sorted.sort_values(by='upvotes', ascending=False).drop(columns=['date_parsed'])
218
  else:
219
  df_sorted = df
220
 
 
242
  """
243
  Sets the current search query and re-sorts the papers.
244
  """
245
+ print(f"Setting search query to: {query}")
246
+ self.current_search_query = query
 
247
  self.sort_papers()
248
  return True # Assume success
249
 
 
381
  """
382
  Changes the sort method and, if 'top' is selected, sets the time frame.
383
  """
384
+ if method.lower() == "top":
385
+ paper_manager.set_sort_method(method.lower(), time_frame)
386
+ else:
387
+ paper_manager.set_sort_method(method.lower())
388
  return paper_manager.get_current_page_papers()
389
 
390
 
 
689
  # --- Launch the App ---
690
 
691
  if __name__ == "__main__":
692
+ demo.launch()