akhaliq HF staff commited on
Commit
45195c8
Β·
verified Β·
1 Parent(s): ba4e64e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -29
app.py CHANGED
@@ -64,9 +64,13 @@ def get_df() -> pd.DataFrame:
64
  info = row.copy()
65
  if "abstract" in info:
66
  del info["abstract"]
67
- info["paper_page"] = f"https://huggingface.co/papers/{row.arxiv_id}"
68
  paper_info.append(info)
69
- return pd.DataFrame(paper_info)
 
 
 
 
 
70
 
71
 
72
  class Prettifier:
@@ -83,15 +87,6 @@ class Prettifier:
83
  def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
84
  new_rows = []
85
  for _, row in df.iterrows():
86
- # Handle authors: list of dicts or list of strings
87
- if "authors" in row and isinstance(row["authors"], list):
88
- authors = ', '.join([
89
- author.get('name', '') if isinstance(author, dict) else str(author)
90
- for author in row["authors"]
91
- ])
92
- else:
93
- authors = 'Unknown'
94
-
95
  # Handle published_at: original date
96
  published_at = row["date"] # Already formatted as "%Y-%m-%d"
97
 
@@ -104,7 +99,6 @@ class Prettifier:
104
  "published_at": published_at, # For internal calculations
105
  "paper_page": Prettifier.create_link(row.arxiv_id, row.paper_page),
106
  "title": row["title"],
107
- "authors": authors, # Include authors
108
  "github": Prettifier.get_github_link(row.get("github", "")),
109
  "πŸ‘": row["upvotes"],
110
  "πŸ’¬": row["num_comments"],
@@ -120,7 +114,6 @@ class PaperList:
120
  ["published_at", "str"], # For internal use
121
  ["paper_page", "markdown"],
122
  ["title", "str"],
123
- ["authors", "str"], # Added authors
124
  ["github", "markdown"],
125
  ["πŸ‘", "number"],
126
  ["πŸ’¬", "number"],
@@ -141,23 +134,18 @@ class PaperList:
141
 
142
  def search(
143
  self,
144
- start_date: datetime.datetime,
145
- end_date: datetime.datetime,
146
  title_search_query: str,
147
- max_num_to_retrieve: int,
148
  ) -> pd.DataFrame:
149
  df = self.df_raw.copy()
150
- df["date"] = pd.to_datetime(df["date"], errors='coerce')
151
 
152
- # Filter by date
153
- df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]
154
- df["date"] = df["date"].dt.strftime("%Y-%m-%d").fillna(datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
155
-
156
- # Filter by title
157
  if title_search_query:
158
  df = df[df["title"].str.contains(title_search_query, case=False, na=False)]
159
-
160
- # Removed abstract_search_query filtering since ragatouille is no longer used
 
 
161
 
162
  # Prettify the DataFrame
163
  df_prettified = self._prettifier(df).loc[:, self.column_names]
@@ -171,6 +159,7 @@ class PaperManager:
171
  self.paper_list = paper_list
172
  self.papers_per_page = papers_per_page
173
  self.sort_method = "hot" # Default sort method
 
174
  self.sort_papers()
175
  # 'current_page' and 'total_pages' are set in 'sort_papers()'
176
 
@@ -180,7 +169,7 @@ class PaperManager:
180
  This mimics the "hotness" algorithm used by platforms like Hacker News.
181
  """
182
  upvotes = row.get('πŸ‘', 0)
183
- published_at_str = row.get('date', datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")) # **FIX** Changed from 'published_at' to 'date'
184
  try:
185
  published_time = datetime.datetime.strptime(published_at_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
186
  except ValueError:
@@ -197,11 +186,15 @@ class PaperManager:
197
  def sort_papers(self):
198
  df = self.paper_list.df_raw.copy()
199
 
 
 
 
 
200
  if self.sort_method == "hot":
201
  df['score'] = df.apply(self.calculate_score, axis=1)
202
  df_sorted = df.sort_values(by='score', ascending=False).drop(columns=['score'])
203
  elif self.sort_method == "new":
204
- df_sorted = df.sort_values(by='date', ascending=False) # **FIX** Changed from 'published_at' to 'date'
205
  else:
206
  df_sorted = df
207
 
@@ -218,6 +211,12 @@ class PaperManager:
218
  self.sort_papers()
219
  return True # Assume success
220
 
 
 
 
 
 
 
221
  def get_current_page_papers(self) -> str:
222
  start = (self.current_page - 1) * self.papers_per_page
223
  end = start + self.papers_per_page
@@ -237,7 +236,6 @@ class PaperManager:
237
  title = row.get('title', 'No title')
238
  paper_id = row.get('arxiv_id', '')
239
  url = f"https://huggingface.co/papers/{paper_id}"
240
- authors = row.get('authors', 'Unknown')
241
  upvotes = row.get('πŸ‘', 0)
242
  comments = row.get('πŸ’¬', 0)
243
  published_time_str = row.get('published_at', datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
@@ -260,7 +258,7 @@ class PaperManager:
260
  <td colspan="1"></td>
261
  <td class="subtext">
262
  <span class="score">{upvotes} upvotes</span><br>
263
- authors: {authors} | {time_ago} | <a href="#">{comments} comments</a>
264
  </td>
265
  </tr>
266
  <tr style="height:5px"></tr>
@@ -334,6 +332,11 @@ def refresh_papers_ui() -> str:
334
  return paper_manager.refresh()
335
 
336
 
 
 
 
 
 
337
  # --- CSS Styling ---
338
 
339
  css = """
@@ -511,7 +514,15 @@ with demo:
511
  </tr>
512
  </table>
513
  """)
514
- # Sort Options
 
 
 
 
 
 
 
 
515
  with gr.Row():
516
  sort_radio = gr.Radio(
517
  choices=["Hot", "New"],
@@ -556,6 +567,13 @@ with demo:
556
  outputs=[paper_list]
557
  )
558
 
 
 
 
 
 
 
 
559
  # Footer
560
  gr.Markdown("""
561
  Related useful Spaces:
 
64
  info = row.copy()
65
  if "abstract" in info:
66
  del info["abstract"]
 
67
  paper_info.append(info)
68
+ df_prepared = pd.DataFrame(paper_info)
69
+
70
+ # Add 'paper_page' links
71
+ df_prepared["paper_page"] = df_prepared["arxiv_id"].apply(lambda x: f"https://huggingface.co/papers/{x}")
72
+
73
+ return df_prepared
74
 
75
 
76
  class Prettifier:
 
87
  def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
88
  new_rows = []
89
  for _, row in df.iterrows():
 
 
 
 
 
 
 
 
 
90
  # Handle published_at: original date
91
  published_at = row["date"] # Already formatted as "%Y-%m-%d"
92
 
 
99
  "published_at": published_at, # For internal calculations
100
  "paper_page": Prettifier.create_link(row.arxiv_id, row.paper_page),
101
  "title": row["title"],
 
102
  "github": Prettifier.get_github_link(row.get("github", "")),
103
  "πŸ‘": row["upvotes"],
104
  "πŸ’¬": row["num_comments"],
 
114
  ["published_at", "str"], # For internal use
115
  ["paper_page", "markdown"],
116
  ["title", "str"],
 
117
  ["github", "markdown"],
118
  ["πŸ‘", "number"],
119
  ["πŸ’¬", "number"],
 
134
 
135
  def search(
136
  self,
 
 
137
  title_search_query: str,
138
+ max_num_to_retrieve: int = 1000, # Set a high default to include all if not specified
139
  ) -> pd.DataFrame:
140
  df = self.df_raw.copy()
 
141
 
142
+ # Filter by title if search query is provided
 
 
 
 
143
  if title_search_query:
144
  df = df[df["title"].str.contains(title_search_query, case=False, na=False)]
145
+
146
+ # Limit the number of papers to retrieve if max_num_to_retrieve is set
147
+ if max_num_to_retrieve:
148
+ df = df.head(max_num_to_retrieve)
149
 
150
  # Prettify the DataFrame
151
  df_prettified = self._prettifier(df).loc[:, self.column_names]
 
159
  self.paper_list = paper_list
160
  self.papers_per_page = papers_per_page
161
  self.sort_method = "hot" # Default sort method
162
+ self.current_search_query = "" # Initialize with no search query
163
  self.sort_papers()
164
  # 'current_page' and 'total_pages' are set in 'sort_papers()'
165
 
 
169
  This mimics the "hotness" algorithm used by platforms like Hacker News.
170
  """
171
  upvotes = row.get('πŸ‘', 0)
172
+ published_at_str = row.get('published_at', datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")) # **FIX** Changed from 'date' to 'published_at'
173
  try:
174
  published_time = datetime.datetime.strptime(published_at_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
175
  except ValueError:
 
186
  def sort_papers(self):
187
  df = self.paper_list.df_raw.copy()
188
 
189
+ # Apply search filter if a search query exists
190
+ if self.current_search_query:
191
+ df = df[df["title"].str.contains(self.current_search_query, case=False, na=False)]
192
+
193
  if self.sort_method == "hot":
194
  df['score'] = df.apply(self.calculate_score, axis=1)
195
  df_sorted = df.sort_values(by='score', ascending=False).drop(columns=['score'])
196
  elif self.sort_method == "new":
197
+ df_sorted = df.sort_values(by='published_at', ascending=False) # **FIX** Changed from 'date' to 'published_at'
198
  else:
199
  df_sorted = df
200
 
 
211
  self.sort_papers()
212
  return True # Assume success
213
 
214
+ def set_search_query(self, query: str):
215
+ print(f"Setting search query to: {query}")
216
+ self.current_search_query = query
217
+ self.sort_papers()
218
+ return True # Assume success
219
+
220
  def get_current_page_papers(self) -> str:
221
  start = (self.current_page - 1) * self.papers_per_page
222
  end = start + self.papers_per_page
 
236
  title = row.get('title', 'No title')
237
  paper_id = row.get('arxiv_id', '')
238
  url = f"https://huggingface.co/papers/{paper_id}"
 
239
  upvotes = row.get('πŸ‘', 0)
240
  comments = row.get('πŸ’¬', 0)
241
  published_time_str = row.get('published_at', datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
 
258
  <td colspan="1"></td>
259
  <td class="subtext">
260
  <span class="score">{upvotes} upvotes</span><br>
261
+ {time_ago} | <a href="#">{comments} comments</a>
262
  </td>
263
  </tr>
264
  <tr style="height:5px"></tr>
 
332
  return paper_manager.refresh()
333
 
334
 
335
+ def search_papers_ui(query: str) -> str:
336
+ paper_manager.set_search_query(query)
337
+ return paper_manager.get_current_page_papers()
338
+
339
+
340
  # --- CSS Styling ---
341
 
342
  css = """
 
514
  </tr>
515
  </table>
516
  """)
517
+ # Search Bar and Sort Options
518
+ with gr.Row():
519
+ search_box = gr.Textbox(
520
+ label="Search Papers by Title",
521
+ placeholder="Enter keywords to search...",
522
+ lines=1,
523
+ interactive=True
524
+ )
525
+ search_button = gr.Button("Search")
526
  with gr.Row():
527
  sort_radio = gr.Radio(
528
  choices=["Hot", "New"],
 
567
  outputs=[paper_list]
568
  )
569
 
570
+ # Search functionality
571
+ search_button.click(
572
+ fn=search_papers_ui,
573
+ inputs=[search_box],
574
+ outputs=[paper_list]
575
+ )
576
+
577
  # Footer
578
  gr.Markdown("""
579
  Related useful Spaces: