akhaliq HF staff commited on
Commit
86ea290
·
verified ·
1 Parent(s): a767c65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -404
app.py CHANGED
@@ -1,251 +1,183 @@
1
- #!/usr/bin/env python
2
-
3
- import datetime
4
- import pandas as pd
5
- import tqdm.auto
6
- from apscheduler.schedulers.background import BackgroundScheduler
7
- from huggingface_hub import HfApi
8
-
9
  import gradio as gr
10
- import datasets # Ensure the datasets library is imported
11
-
12
- from datetime import timezone
13
- import atexit # To gracefully shut down the scheduler
14
- import logging # For logging purposes
15
-
16
- # --- Logging Configuration ---
17
- logging.basicConfig(level=logging.INFO)
18
- logger = logging.getLogger(__name__)
19
-
20
- # --- Data Loading and Processing ---
21
-
22
- api = HfApi()
23
-
24
- def get_df() -> pd.DataFrame:
25
- """
26
- Loads and merges the papers and stats datasets, preprocesses the data by removing unnecessary columns,
27
- and adds a 'paper_page' link for each paper.
28
- """
29
- try:
30
- # Load datasets
31
- logger.info("Loading 'daily-papers' dataset.")
32
- df_papers = datasets.load_dataset("hysts-bot-data/daily-papers", split="train").to_pandas()
33
- logger.info("Loading 'daily-papers-stats' dataset.")
34
- df_stats = datasets.load_dataset("hysts-bot-data/daily-papers-stats", split="train").to_pandas()
35
-
36
- # Merge datasets on 'arxiv_id'
37
- logger.info("Merging datasets on 'arxiv_id'.")
38
- df = pd.merge(left=df_papers, right=df_stats, on="arxiv_id", suffixes=('_papers', '_stats'))
39
-
40
- # Reverse the DataFrame to have the latest papers first
41
- df = df[::-1].reset_index(drop=True)
42
-
43
- # Ensure 'date' is in datetime format and handle missing dates
44
- logger.info("Processing 'date' column.")
45
- df["date"] = pd.to_datetime(df["date"], errors='coerce')
46
- df["date"] = df["date"].dt.strftime("%Y-%m-%d").fillna(datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
47
-
48
- # Prepare the DataFrame by removing 'abstract'
49
- logger.info("Removing 'abstract' column if present.")
50
- if 'abstract' in df.columns:
51
- df = df.drop(columns=['abstract'])
52
-
53
- # Add 'paper_page' links
54
- logger.info("Adding 'paper_page' links.")
55
- df["paper_page"] = df["arxiv_id"].apply(lambda x: f"https://huggingface.co/papers/{x}")
56
-
57
- # Verify that 'date' column exists
58
- if 'date' not in df.columns:
59
- logger.error("'date' column is missing from the DataFrame. Filling with current date.")
60
- df["date"] = datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")
61
-
62
- logger.info("DataFrame preparation complete.")
63
- return df
64
- except Exception as e:
65
- logger.error(f"Error in get_df: {e}")
66
- return pd.DataFrame() # Return empty DataFrame on error
67
-
68
-
69
- class Prettifier:
70
- """
71
- Converts raw DataFrame rows into a prettified format suitable for display.
72
- """
73
- REQUIRED_COLUMNS = ["arxiv_id", "date_display", "date", "paper_page", "title", "github", "👍", "💬"]
74
-
75
- @staticmethod
76
- def get_github_link(link: str) -> str:
77
- if not link:
78
- return ""
79
- return Prettifier.create_link("github", link)
80
-
81
- @staticmethod
82
- def create_link(text: str, url: str) -> str:
83
- return f'<a href="{url}" target="_blank">{text}</a>'
84
-
85
- def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
86
- new_rows = []
87
- for _, row in df.iterrows():
88
- # Handle date_display as a clickable link
89
- date_display = Prettifier.create_link(row.get("date", ""), f"https://huggingface.co/papers?date={row.get('date', '')}")
90
-
91
- new_row = {
92
- "arxiv_id": row.get("arxiv_id", ""), # Include arxiv_id
93
- "date_display": date_display, # For display
94
- "date": row.get("date", datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")), # For internal calculations
95
- "paper_page": Prettifier.create_link(row.get("arxiv_id", ""), row.get("paper_page", "#")),
96
- "title": row.get("title", "No title"),
97
- "github": Prettifier.get_github_link(row.get("github", "")),
98
- "👍": row.get("upvotes", 0),
99
- "💬": row.get("num_comments", 0),
100
- }
101
- new_rows.append(new_row)
102
-
103
- # If no rows, return empty DataFrame with required columns to prevent KeyError
104
- if not new_rows:
105
- return pd.DataFrame(columns=self.REQUIRED_COLUMNS)
106
-
107
- return pd.DataFrame(new_rows)
108
-
109
-
110
- class PaperList:
111
- """
112
- Manages the list of papers.
113
- """
114
- COLUMN_INFO = [
115
- ["arxiv_id", "str"], # Added arxiv_id
116
- ["date_display", "markdown"], # For display
117
- ["date", "str"], # For internal use
118
- ["paper_page", "markdown"],
119
- ["title", "str"],
120
- ["github", "markdown"],
121
- ["👍", "number"],
122
- ["💬", "number"],
123
- ]
124
-
125
- def __init__(self, df: pd.DataFrame):
126
- self.df_raw = df
127
- self._prettifier = Prettifier()
128
- self.df_prettified = self._prettifier(df).loc[:, self.column_names]
129
-
130
- @property
131
- def column_names(self):
132
- return [col[0] for col in self.COLUMN_INFO]
133
-
134
- @property
135
- def column_datatype(self):
136
- return [col[1] for col in self.COLUMN_INFO]
137
-
138
- def get_prettified_df(self) -> pd.DataFrame:
139
- """
140
- Returns the prettified DataFrame.
141
- """
142
- return self.df_prettified
143
 
144
-
145
- # --- Sorting and Pagination Management ---
146
 
147
  class PaperManager:
148
- """
149
- Manages sorting and pagination for the list of papers.
150
- """
151
- def __init__(self, paper_list: PaperList, papers_per_page=30):
152
- self.paper_list = paper_list
153
  self.papers_per_page = papers_per_page
154
- self.sort_method = "hot" # Default sort method
155
- self.sort_papers()
156
- # 'current_page' and 'total_pages' are set in 'sort_papers()'
 
 
157
 
158
- def calculate_score(self, row):
159
  """
160
  Calculate the score of a paper based on upvotes and age.
161
  This mimics the "hotness" algorithm used by platforms like Hacker News.
162
  """
163
- upvotes = row.get('upvotes', 0) # Corrected from '👍' to 'upvotes'
164
- date_str = row.get('date', datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
165
  try:
166
- published_time = datetime.datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
167
  except ValueError:
168
  # If parsing fails, use current time to minimize the impact on sorting
169
- published_time = datetime.datetime.now(timezone.utc)
170
-
171
- time_diff = datetime.datetime.now(timezone.utc) - published_time
172
  time_diff_hours = time_diff.total_seconds() / 3600 # Convert time difference to hours
173
 
174
  # Avoid division by zero and apply the hotness formula
175
- score = upvotes / ((time_diff_hours + 2) ** 1.5) if (time_diff_hours + 2) > 0 else 0
176
  return score
177
 
178
- def sort_papers(self):
179
  """
180
- Sorts the papers based on the current sort method.
 
181
  """
182
- df = self.paper_list.df_raw.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
 
184
  if self.sort_method == "hot":
185
- if not df.empty:
186
- df = df.drop(columns=['score'], errors='ignore') # Remove existing 'score' column if present
187
- df['score'] = df.apply(self.calculate_score, axis=1)
188
- df_sorted = df.sort_values(by='score', ascending=False).drop(columns=['score'])
189
- else:
190
- df_sorted = df
191
  elif self.sort_method == "new":
192
- df_sorted = df.sort_values(by='date', ascending=False) # Sort by 'date'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  else:
194
- df_sorted = df
195
-
196
- self.paper_list.df_raw = df_sorted.reset_index(drop=True)
197
- self.paper_list.df_prettified = self.paper_list._prettifier(self.paper_list.df_raw).loc[:, self.paper_list.column_names]
198
- self.total_pages = max((len(self.paper_list.df_raw) + self.papers_per_page - 1) // self.papers_per_page, 1)
199
- self.current_page = 1
200
- logger.info(f"Papers sorted by {self.sort_method}. Total pages: {self.total_pages}")
201
-
202
- def set_sort_method(self, method, time_frame=None):
203
- """
204
- Sets the sort method ('hot', 'new') and re-sorts the papers.
205
- """
206
- if method not in ["hot", "new"]:
207
  method = "hot"
208
- logger.info(f"Setting sort method to: {method}")
209
  self.sort_method = method
210
  self.sort_papers()
 
211
  return True # Assume success
212
 
213
- def get_current_page_papers(self) -> str:
214
- """
215
- Retrieves the HTML string of the current page's papers.
216
- """
217
- start = (self.current_page - 1) * self.papers_per_page
218
- end = start + self.papers_per_page
219
- current_papers = self.paper_list.df_prettified.iloc[start:end]
220
-
221
- if current_papers.empty:
222
- return "<div class='no-papers'>No papers available for this page.</div>"
223
-
224
- papers_html = "".join([self.format_paper(row, idx + start + 1) for idx, row in current_papers.iterrows()])
225
- return f"""
226
- <table border="0" cellpadding="0" cellspacing="0" class="itemlist">
227
- {papers_html}
228
- </table>
229
- """
230
-
231
- def format_paper(self, row, rank):
232
- """
233
- Formats a single paper entry into HTML.
234
- """
235
- title = row.get('title', 'No title')
236
- paper_id = row.get('arxiv_id', '')
237
  url = f"https://huggingface.co/papers/{paper_id}"
238
- upvotes = row.get('👍', 0)
239
- comments = row.get('💬', 0)
240
- date_str = row.get('date', datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d"))
 
241
  try:
242
- published_time = datetime.datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
243
  except ValueError:
244
- published_time = datetime.datetime.now(timezone.utc)
245
- time_diff = datetime.datetime.now(timezone.utc) - published_time
246
  time_ago_days = time_diff.days
247
  time_ago = f"{time_ago_days} days ago" if time_ago_days > 0 else "today"
248
 
 
 
 
 
249
  return f"""
250
  <tr class="athing">
251
  <td align="right" valign="top" class="title"><span class="rank">{rank}.</span></td>
@@ -257,126 +189,64 @@ class PaperManager:
257
  <td colspan="1"></td>
258
  <td class="subtext">
259
  <span class="score">{upvotes} upvotes</span><br>
260
- {time_ago} | <a href="#">{comments} comments</a>
 
261
  </td>
262
  </tr>
263
  <tr style="height:5px"></tr>
264
  """
265
 
266
- def next_page(self) -> str:
267
- """
268
- Navigates to the next page if possible.
 
 
 
 
 
 
 
 
 
 
269
  """
 
 
270
  if self.current_page < self.total_pages:
271
  self.current_page += 1
272
- logger.info(f"Navigated to page {self.current_page}.")
273
- else:
274
- logger.info("Already on the last page.")
275
- return self.get_current_page_papers()
276
 
277
- def prev_page(self) -> str:
278
- """
279
- Navigates to the previous page if possible.
280
- """
281
  if self.current_page > 1:
282
  self.current_page -= 1
283
- logger.info(f"Navigated to page {self.current_page}.")
284
- else:
285
- logger.info("Already on the first page.")
286
- return self.get_current_page_papers()
287
 
288
- def refresh(self) -> str:
289
- """
290
- Refreshes the current list of papers.
291
- """
292
- logger.info("Refreshing papers.")
293
- self.sort_papers()
294
- return self.get_current_page_papers()
295
-
296
-
297
- # Initialize PaperList and PaperManager
298
- def initialize_paper_manager() -> str:
299
- """
300
- Initializes the PaperList and PaperManager with the current DataFrame.
301
- """
302
- df = get_df()
303
- if df.empty:
304
- logger.warning("Initialized with an empty DataFrame.")
305
- paper_list = PaperList(df)
306
- manager = PaperManager(paper_list)
307
- logger.info("PaperManager initialized.")
308
- return manager.get_current_page_papers() # Return HTML string instead of the manager object
309
-
310
-
311
- paper_manager = None # Initialize globally
312
-
313
- def setup_paper_manager():
314
- """
315
- Sets up the global PaperManager instance.
316
- """
317
- global paper_manager
318
- df = get_df()
319
- paper_list = PaperList(df)
320
- paper_manager = PaperManager(paper_list)
321
- logger.info("PaperManager setup complete.")
322
-
323
-
324
- # Initialize PaperManager at the start
325
- setup_paper_manager()
326
-
327
-
328
- def update_paper_manager() -> str:
329
- """
330
- Updates the global PaperManager with the latest DataFrame.
331
- """
332
- global paper_manager
333
- logger.info("Updating PaperManager with latest data.")
334
- df = get_df()
335
- if df.empty:
336
- logger.warning("DataFrame is empty. Skipping update.")
337
- return paper_manager.get_current_page_papers()
338
- paper_manager.paper_list = PaperList(df)
339
- paper_manager.sort_papers()
340
- logger.info("PaperManager updated successfully.")
341
- return paper_manager.get_current_page_papers()
342
-
343
-
344
- # Scheduler for updating paper list every hour
345
- scheduler_data = BackgroundScheduler()
346
- scheduler_data.add_job(
347
- func=update_paper_manager,
348
- trigger="cron",
349
- minute=0, # Every hour at minute 0
350
- timezone="UTC",
351
- misfire_grace_time=60,
352
- )
353
- scheduler_data.start()
354
- logger.info("BackgroundScheduler started.")
355
-
356
- # Ensure the scheduler shuts down gracefully on exit
357
- atexit.register(lambda: scheduler_data.shutdown())
358
- logger.info("Scheduler shutdown registered.")
359
-
360
-
361
- # --- Gradio Interface Functions ---
362
-
363
- def change_sort_method_ui(method: str) -> str:
364
- """
365
- Changes the sort method based on user selection.
366
- """
367
- logger.info(f"Changing sort method to: {method}")
368
- success = paper_manager.set_sort_method(method.lower())
369
- if success:
370
- return paper_manager.get_current_page_papers()
371
- else:
372
- return "<div class='no-papers'>Failed to change sort method.</div>"
373
 
 
 
 
 
 
374
 
375
- # --- CSS Styling ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
377
  css = """
378
- /* Hacker News-like CSS */
379
-
380
  body {
381
  background-color: white;
382
  font-family: Verdana, Geneva, sans-serif;
@@ -415,20 +285,6 @@ table {
415
  text-decoration: none;
416
  }
417
 
418
- .header-table .sort-buttons button {
419
- background: none;
420
- border: none;
421
- color: #0000ff;
422
- cursor: pointer;
423
- font-size: 14pt;
424
- text-decoration: underline;
425
- padding: 0 10px;
426
- }
427
-
428
- .header-table .sort-buttons button:hover {
429
- color: #551A8B;
430
- }
431
-
432
  .itemlist .athing {
433
  background-color: #f6f6ef;
434
  }
@@ -466,11 +322,6 @@ table {
466
  font-size: 12pt;
467
  }
468
 
469
- .sort-buttons button {
470
- font-size: 12pt;
471
- padding: 0 5px;
472
- }
473
-
474
  .storylink {
475
  font-size: 9pt;
476
  }
@@ -503,14 +354,6 @@ table {
503
  color: black;
504
  }
505
 
506
- .header-table .sort-buttons button {
507
- color: #add8e6;
508
- }
509
-
510
- .header-table .sort-buttons button:hover {
511
- color: #9370db;
512
- }
513
-
514
  .itemlist .athing {
515
  background-color: #1e1e1e;
516
  }
@@ -533,9 +376,6 @@ table {
533
  }
534
  """
535
 
536
-
537
- # --- Initialize Gradio Blocks ---
538
-
539
  demo = gr.Blocks(css=css)
540
 
541
  with demo:
@@ -548,72 +388,46 @@ with demo:
548
 
549
  Once your paper is submitted, it will automatically appear in this demo.
550
  """)
551
-
552
- # Hacker News-like Header with "Hot" and "New" sort options
553
  with gr.Row():
554
- # Left side: Site title
555
- gr.Markdown("""
556
- <table border="0" cellpadding="0" cellspacing="0" class="header-table">
557
- <tr>
558
- <td>
559
- <span class="pagetop">
560
- <b class="hnname"><a href="#">Daily Papers</a></b>
561
- </span>
562
- </td>
563
- <td align="right" class="sort-buttons">
564
- <!-- Removed custom HTML buttons -->
565
- </td>
566
- </tr>
567
- </table>
568
- """, show_label=False)
569
- # Right side: Gradio Buttons for "Hot" and "New"
570
- with gr.Column(elem_classes=["sort-buttons"]):
571
- hot_button = gr.Button("Hot", elem_id="hot_button")
572
- new_button = gr.Button("New", elem_id="new_button")
573
-
574
  # Paper list
575
  paper_list = gr.HTML()
576
-
577
  # Navigation Buttons
578
  with gr.Row():
579
  prev_button = gr.Button("Prev")
580
  next_button = gr.Button("Next")
581
-
582
  # Load papers on app start
583
- demo.load(
584
- fn=lambda: paper_manager.get_current_page_papers(),
585
- outputs=[paper_list]
586
- )
587
-
588
  # Button clicks for pagination
589
- prev_button.click(
590
- fn=lambda: paper_manager.prev_page(),
591
- inputs=[],
592
- outputs=[paper_list]
593
- )
594
- next_button.click(
595
- fn=lambda: paper_manager.next_page(),
596
- inputs=[],
597
- outputs=[paper_list]
598
- )
599
-
600
- # Gradio Buttons trigger sort methods directly
601
- hot_button.click(
602
- fn=lambda: change_sort_method_ui("hot"),
603
- inputs=[],
604
- outputs=[paper_list]
605
- )
606
- new_button.click(
607
- fn=lambda: change_sort_method_ui("new"),
608
- inputs=[],
609
  outputs=[paper_list]
610
  )
611
-
612
- # Footer - Removed as per request
613
- # Removed the footer markdown section
614
-
615
-
616
- # --- Launch the App ---
617
 
618
- if __name__ == "__main__":
619
- demo.launch()
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import requests
3
+ from datetime import datetime, timezone
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ API_URL = "https://huggingface.co/api/daily_papers"
7
+ REPOS_API_URL_TEMPLATE = "https://huggingface.co/api/arxiv/{arxiv_id}/repos"
8
 
9
  class PaperManager:
10
+ def __init__(self, papers_per_page=30):
 
 
 
 
11
  self.papers_per_page = papers_per_page
12
+ self.current_page = 1
13
+ self.papers = []
14
+ self.total_pages = 1
15
+ self.sort_method = "hot" # Default sort method
16
+ self.raw_papers = [] # To store fetched data
17
 
18
+ def calculate_score(self, paper):
19
  """
20
  Calculate the score of a paper based on upvotes and age.
21
  This mimics the "hotness" algorithm used by platforms like Hacker News.
22
  """
23
+ upvotes = paper.get('paper', {}).get('upvotes', 0)
24
+ published_at_str = paper.get('publishedAt', datetime.now(timezone.utc).isoformat())
25
  try:
26
+ published_time = datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
27
  except ValueError:
28
  # If parsing fails, use current time to minimize the impact on sorting
29
+ published_time = datetime.now(timezone.utc)
30
+
31
+ time_diff = datetime.now(timezone.utc) - published_time
32
  time_diff_hours = time_diff.total_seconds() / 3600 # Convert time difference to hours
33
 
34
  # Avoid division by zero and apply the hotness formula
35
+ score = upvotes / ((time_diff_hours + 2) ** 1.5)
36
  return score
37
 
38
+ def fetch_repos_counts(self, arxiv_id):
39
  """
40
+ Fetch the repositories (models, datasets, Spaces) associated with a given arxiv_id.
41
+ Returns a dictionary with counts for each type.
42
  """
43
+ try:
44
+ response = requests.get(REPOS_API_URL_TEMPLATE.format(arxiv_id=arxiv_id))
45
+ response.raise_for_status()
46
+ data = response.json()
47
+
48
+ counts = {'models': 0, 'datasets': 0, 'spaces': 0}
49
+ for repo in data:
50
+ repo_type = repo.get('type', '').lower()
51
+ if repo_type == 'model':
52
+ counts['models'] += 1
53
+ elif repo_type == 'dataset':
54
+ counts['datasets'] += 1
55
+ elif repo_type == 'space':
56
+ counts['spaces'] += 1
57
+ return counts
58
+ except requests.RequestException as e:
59
+ print(f"Error fetching repos for arxiv_id {arxiv_id}: {e}")
60
+ return {'models': 0, 'datasets': 0, 'spaces': 0}
61
+ except Exception as e:
62
+ print(f"Unexpected error fetching repos for arxiv_id {arxiv_id}: {e}")
63
+ return {'models': 0, 'datasets': 0, 'spaces': 0}
64
+
65
+ def fetch_papers(self):
66
+ try:
67
+ response = requests.get(f"{API_URL}?limit=100")
68
+ response.raise_for_status()
69
+ data = response.json()
70
+
71
+ if not data:
72
+ print("No data received from API.")
73
+ return False
74
+
75
+ # Debug: Print keys of the first paper
76
+ print("Keys in the first paper:", data[0].keys())
77
+
78
+ self.raw_papers = data # Store raw data
79
+
80
+ # Fetch repos counts concurrently
81
+ with ThreadPoolExecutor(max_workers=20) as executor:
82
+ future_to_paper = {}
83
+ for paper in self.raw_papers:
84
+ arxiv_id = paper.get('paper', {}).get('arxiv_id', '')
85
+ if arxiv_id:
86
+ future = executor.submit(self.fetch_repos_counts, arxiv_id)
87
+ future_to_paper[future] = paper
88
+ else:
89
+ # If no arxiv_id, set counts to zero
90
+ paper['models'] = 0
91
+ paper['datasets'] = 0
92
+ paper['spaces'] = 0
93
+
94
+ for future in as_completed(future_to_paper):
95
+ paper = future_to_paper[future]
96
+ counts = future.result()
97
+ paper['models'] = counts['models']
98
+ paper['datasets'] = counts['datasets']
99
+ paper['spaces'] = counts['spaces']
100
+
101
+ self.sort_papers()
102
+ self.total_pages = max((len(self.papers) + self.papers_per_page - 1) // self.papers_per_page, 1)
103
+ self.current_page = 1
104
+ return True
105
+ except requests.RequestException as e:
106
+ print(f"Error fetching papers: {e}")
107
+ return False
108
+ except Exception as e:
109
+ print(f"Unexpected error: {e}")
110
+ return False
111
 
112
+ def sort_papers(self):
113
  if self.sort_method == "hot":
114
+ self.papers = sorted(
115
+ self.raw_papers,
116
+ key=lambda x: self.calculate_score(x),
117
+ reverse=True
118
+ )
 
119
  elif self.sort_method == "new":
120
+ self.papers = sorted(
121
+ self.raw_papers,
122
+ key=lambda x: x.get('published_at', ''),
123
+ reverse=True
124
+ )
125
+ elif self.sort_method == "most_models":
126
+ self.papers = sorted(
127
+ self.raw_papers,
128
+ key=lambda x: x.get('models', 0),
129
+ reverse=True
130
+ )
131
+ elif self.sort_method == "most_datasets":
132
+ self.papers = sorted(
133
+ self.raw_papers,
134
+ key=lambda x: x.get('datasets', 0),
135
+ reverse=True
136
+ )
137
+ elif self.sort_method == "most_spaces":
138
+ self.papers = sorted(
139
+ self.raw_papers,
140
+ key=lambda x: x.get('spaces', 0),
141
+ reverse=True
142
+ )
143
  else:
144
+ # Default to hot if unknown sort method
145
+ self.papers = sorted(
146
+ self.raw_papers,
147
+ key=lambda x: self.calculate_score(x),
148
+ reverse=True
149
+ )
150
+
151
+ def set_sort_method(self, method):
152
+ valid_methods = ["hot", "new", "most_models", "most_datasets", "most_spaces"]
153
+ if method not in valid_methods:
 
 
 
154
  method = "hot"
155
+ print(f"Setting sort method to: {method}")
156
  self.sort_method = method
157
  self.sort_papers()
158
+ self.current_page = 1
159
  return True # Assume success
160
 
161
+ def format_paper(self, paper, rank):
162
+ title = paper.get('title', 'No title')
163
+ paper_id = paper.get('paper', {}).get('id', '')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  url = f"https://huggingface.co/papers/{paper_id}"
165
+ authors = ', '.join([author.get('name', '') for author in paper.get('paper', {}).get('authors', [])]) or 'Unknown'
166
+ upvotes = paper.get('paper', {}).get('upvotes', 0)
167
+ comments = paper.get('numComments', 0)
168
+ published_time_str = paper.get('published_at', datetime.now(timezone.utc).isoformat())
169
  try:
170
+ published_time = datetime.fromisoformat(published_time_str.replace('Z', '+00:00'))
171
  except ValueError:
172
+ published_time = datetime.now(timezone.utc)
173
+ time_diff = datetime.now(timezone.utc) - published_time
174
  time_ago_days = time_diff.days
175
  time_ago = f"{time_ago_days} days ago" if time_ago_days > 0 else "today"
176
 
177
+ models = paper.get('models', 0)
178
+ datasets = paper.get('datasets', 0)
179
+ spaces = paper.get('spaces', 0)
180
+
181
  return f"""
182
  <tr class="athing">
183
  <td align="right" valign="top" class="title"><span class="rank">{rank}.</span></td>
 
189
  <td colspan="1"></td>
190
  <td class="subtext">
191
  <span class="score">{upvotes} upvotes</span><br>
192
+ authors: {authors} | {time_ago} | <a href="#">{comments} comments</a><br>
193
+ Models: {models} | Datasets: {datasets} | Spaces: {spaces}
194
  </td>
195
  </tr>
196
  <tr style="height:5px"></tr>
197
  """
198
 
199
+ def render_papers(self):
200
+ start = (self.current_page - 1) * self.papers_per_page
201
+ end = start + self.papers_per_page
202
+ current_papers = self.papers[start:end]
203
+
204
+ if not current_papers:
205
+ return "<div class='no-papers'>No papers available for this page.</div>"
206
+
207
+ papers_html = "".join([self.format_paper(paper, idx + start + 1) for idx, paper in enumerate(current_papers)])
208
+ return f"""
209
+ <table border="0" cellpadding="0" cellspacing="0" class="itemlist">
210
+ {papers_html}
211
+ </table>
212
  """
213
+
214
+ def next_page(self):
215
  if self.current_page < self.total_pages:
216
  self.current_page += 1
217
+ return self.render_papers()
 
 
 
218
 
219
+ def prev_page(self):
 
 
 
220
  if self.current_page > 1:
221
  self.current_page -= 1
222
+ return self.render_papers()
 
 
 
223
 
224
+ paper_manager = PaperManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
+ def initialize_app():
227
+ if paper_manager.fetch_papers():
228
+ return paper_manager.render_papers()
229
+ else:
230
+ return "<div class='no-papers'>Failed to fetch papers. Please try again later.</div>"
231
 
232
+ def refresh_papers():
233
+ if paper_manager.fetch_papers():
234
+ return paper_manager.render_papers()
235
+ else:
236
+ return "<div class='no-papers'>Failed to refresh papers. Please try again later.</div>"
237
+
238
+ def change_sort_method(method):
239
+ method_lower = method.lower().replace(" ", "_")
240
+ print(f"Changing sort method to: {method_lower}")
241
+ if paper_manager.set_sort_method(method_lower):
242
+ print("Sort method set successfully.")
243
+ return paper_manager.render_papers()
244
+ else:
245
+ print("Failed to set sort method.")
246
+ return "<div class='no-papers'>Failed to sort papers. Please try again later.</div>"
247
 
248
  css = """
249
+ /* Existing CSS remains unchanged */
 
250
  body {
251
  background-color: white;
252
  font-family: Verdana, Geneva, sans-serif;
 
285
  text-decoration: none;
286
  }
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  .itemlist .athing {
289
  background-color: #f6f6ef;
290
  }
 
322
  font-size: 12pt;
323
  }
324
 
 
 
 
 
 
325
  .storylink {
326
  font-size: 9pt;
327
  }
 
354
  color: black;
355
  }
356
 
 
 
 
 
 
 
 
 
357
  .itemlist .athing {
358
  background-color: #1e1e1e;
359
  }
 
376
  }
377
  """
378
 
 
 
 
379
  demo = gr.Blocks(css=css)
380
 
381
  with demo:
 
388
 
389
  Once your paper is submitted, it will automatically appear in this demo.
390
  """)
391
+ # Header without Refresh Button
 
392
  with gr.Row():
393
+ gr.HTML("""
394
+ <table border="0" cellpadding="0" cellspacing="0" class="header-table">
395
+ <tr>
396
+ <td>
397
+ <span class="pagetop">
398
+ <b class="hnname"><a href="#">Daily Papers</a></b>
399
+ </span>
400
+ </td>
401
+ </tr>
402
+ </table>
403
+ """)
404
+ # Sort Options
405
+ with gr.Row():
406
+ sort_radio = gr.Radio(
407
+ choices=["Hot", "New", "Most Models", "Most Datasets", "Most Spaces"],
408
+ value="Hot",
409
+ label="Sort By",
410
+ interactive=True
411
+ )
 
412
  # Paper list
413
  paper_list = gr.HTML()
 
414
  # Navigation Buttons
415
  with gr.Row():
416
  prev_button = gr.Button("Prev")
417
  next_button = gr.Button("Next")
418
+
419
  # Load papers on app start
420
+ demo.load(initialize_app, outputs=[paper_list])
421
+
 
 
 
422
  # Button clicks for pagination
423
+ prev_button.click(paper_manager.prev_page, outputs=[paper_list])
424
+ next_button.click(paper_manager.next_page, outputs=[paper_list])
425
+
426
+ # Sort option change
427
+ sort_radio.change(
428
+ fn=change_sort_method,
429
+ inputs=[sort_radio],
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  outputs=[paper_list]
431
  )
 
 
 
 
 
 
432
 
433
+ demo.launch()