Update app.py
Browse files
app.py
CHANGED
@@ -41,11 +41,9 @@ class PaperManager:
|
|
41 |
Returns a dictionary with counts for each type.
|
42 |
"""
|
43 |
try:
|
44 |
-
print(f"Fetching repos for arxiv_id: {arxiv_id}") # Debug
|
45 |
response = requests.get(REPOS_API_URL_TEMPLATE.format(arxiv_id=arxiv_id))
|
46 |
response.raise_for_status()
|
47 |
data = response.json()
|
48 |
-
print(f"Repos data for arxiv_id {arxiv_id}: {data}") # Debug
|
49 |
|
50 |
counts = {'models': 0, 'datasets': 0, 'spaces': 0}
|
51 |
for repo in data:
|
@@ -56,7 +54,6 @@ class PaperManager:
|
|
56 |
counts['datasets'] += 1
|
57 |
elif repo_type == 'space':
|
58 |
counts['spaces'] += 1
|
59 |
-
print(f"Counts for arxiv_id {arxiv_id}: {counts}") # Debug
|
60 |
return counts
|
61 |
except requests.RequestException as e:
|
62 |
print(f"Error fetching repos for arxiv_id {arxiv_id}: {e}")
|
@@ -75,30 +72,14 @@ class PaperManager:
|
|
75 |
print("No data received from API.")
|
76 |
return False
|
77 |
|
78 |
-
# Debug: Print keys of the first paper
|
79 |
-
print("Keys in the first paper:", data[0].keys())
|
80 |
-
|
81 |
self.raw_papers = data # Store raw data
|
82 |
|
83 |
# Fetch repos counts concurrently
|
84 |
with ThreadPoolExecutor(max_workers=20) as executor:
|
85 |
-
future_to_paper = {}
|
86 |
-
for paper in self.raw_papers:
|
87 |
-
arxiv_id = paper.get('paper', {}).get('arxiv_id', '')
|
88 |
-
print(f"Processing paper ID: {paper.get('paper', {}).get('id', '')}, arxiv_id: {arxiv_id}") # Debug
|
89 |
-
if arxiv_id:
|
90 |
-
future = executor.submit(self.fetch_repos_counts, arxiv_id)
|
91 |
-
future_to_paper[future] = paper
|
92 |
-
else:
|
93 |
-
# If no arxiv_id, set counts to zero
|
94 |
-
paper['models'] = 0
|
95 |
-
paper['datasets'] = 0
|
96 |
-
paper['spaces'] = 0
|
97 |
-
|
98 |
for future in as_completed(future_to_paper):
|
99 |
paper = future_to_paper[future]
|
100 |
counts = future.result()
|
101 |
-
print(f"Paper ID: {paper.get('paper', {}).get('id', '')}, Counts: {counts}") # Debug
|
102 |
paper['models'] = counts['models']
|
103 |
paper['datasets'] = counts['datasets']
|
104 |
paper['spaces'] = counts['spaces']
|
@@ -124,7 +105,7 @@ class PaperManager:
|
|
124 |
elif self.sort_method == "new":
|
125 |
self.papers = sorted(
|
126 |
self.raw_papers,
|
127 |
-
key=lambda x: x.get('
|
128 |
reverse=True
|
129 |
)
|
130 |
elif self.sort_method == "most_models":
|
@@ -171,7 +152,7 @@ class PaperManager:
|
|
171 |
authors = ', '.join([author.get('name', '') for author in paper.get('paper', {}).get('authors', [])]) or 'Unknown'
|
172 |
upvotes = paper.get('paper', {}).get('upvotes', 0)
|
173 |
comments = paper.get('numComments', 0)
|
174 |
-
published_time_str = paper.get('
|
175 |
try:
|
176 |
published_time = datetime.fromisoformat(published_time_str.replace('Z', '+00:00'))
|
177 |
except ValueError:
|
@@ -184,8 +165,6 @@ class PaperManager:
|
|
184 |
datasets = paper.get('datasets', 0)
|
185 |
spaces = paper.get('spaces', 0)
|
186 |
|
187 |
-
print(f"Formatting paper {rank}: Models={models}, Datasets={datasets}, Spaces={spaces}") # Debug
|
188 |
-
|
189 |
return f"""
|
190 |
<tr class="athing">
|
191 |
<td align="right" valign="top" class="title"><span class="rank">{rank}.</span></td>
|
@@ -254,7 +233,6 @@ def change_sort_method(method):
|
|
254 |
return "<div class='no-papers'>Failed to sort papers. Please try again later.</div>"
|
255 |
|
256 |
css = """
|
257 |
-
/* Existing CSS remains unchanged */
|
258 |
body {
|
259 |
background-color: white;
|
260 |
font-family: Verdana, Geneva, sans-serif;
|
|
|
41 |
Returns a dictionary with counts for each type.
|
42 |
"""
|
43 |
try:
|
|
|
44 |
response = requests.get(REPOS_API_URL_TEMPLATE.format(arxiv_id=arxiv_id))
|
45 |
response.raise_for_status()
|
46 |
data = response.json()
|
|
|
47 |
|
48 |
counts = {'models': 0, 'datasets': 0, 'spaces': 0}
|
49 |
for repo in data:
|
|
|
54 |
counts['datasets'] += 1
|
55 |
elif repo_type == 'space':
|
56 |
counts['spaces'] += 1
|
|
|
57 |
return counts
|
58 |
except requests.RequestException as e:
|
59 |
print(f"Error fetching repos for arxiv_id {arxiv_id}: {e}")
|
|
|
72 |
print("No data received from API.")
|
73 |
return False
|
74 |
|
|
|
|
|
|
|
75 |
self.raw_papers = data # Store raw data
|
76 |
|
77 |
# Fetch repos counts concurrently
|
78 |
with ThreadPoolExecutor(max_workers=20) as executor:
|
79 |
+
future_to_paper = {executor.submit(self.fetch_repos_counts, paper.get('paper', {}).get('arxiv_id', '')): paper for paper in self.raw_papers}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
for future in as_completed(future_to_paper):
|
81 |
paper = future_to_paper[future]
|
82 |
counts = future.result()
|
|
|
83 |
paper['models'] = counts['models']
|
84 |
paper['datasets'] = counts['datasets']
|
85 |
paper['spaces'] = counts['spaces']
|
|
|
105 |
elif self.sort_method == "new":
|
106 |
self.papers = sorted(
|
107 |
self.raw_papers,
|
108 |
+
key=lambda x: x.get('publishedAt', ''),
|
109 |
reverse=True
|
110 |
)
|
111 |
elif self.sort_method == "most_models":
|
|
|
152 |
authors = ', '.join([author.get('name', '') for author in paper.get('paper', {}).get('authors', [])]) or 'Unknown'
|
153 |
upvotes = paper.get('paper', {}).get('upvotes', 0)
|
154 |
comments = paper.get('numComments', 0)
|
155 |
+
published_time_str = paper.get('publishedAt', datetime.now(timezone.utc).isoformat())
|
156 |
try:
|
157 |
published_time = datetime.fromisoformat(published_time_str.replace('Z', '+00:00'))
|
158 |
except ValueError:
|
|
|
165 |
datasets = paper.get('datasets', 0)
|
166 |
spaces = paper.get('spaces', 0)
|
167 |
|
|
|
|
|
168 |
return f"""
|
169 |
<tr class="athing">
|
170 |
<td align="right" valign="top" class="title"><span class="rank">{rank}.</span></td>
|
|
|
233 |
return "<div class='no-papers'>Failed to sort papers. Please try again later.</div>"
|
234 |
|
235 |
css = """
|
|
|
236 |
body {
|
237 |
background-color: white;
|
238 |
font-family: Verdana, Geneva, sans-serif;
|