akhaliq's picture
akhaliq HF staff
Update app.py
03bdc59 verified
raw
history blame
12.3 kB
import gradio as gr
import requests
from datetime import datetime, timezone
import logging
# Configure logging for debugging purposes
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
API_URL = "https://huggingface.co/api/daily_papers"
class PaperManager:
def __init__(self, papers_per_page=10, max_papers=100):
self.papers_per_page = papers_per_page
self.max_papers = max_papers
self.cache = []
self.last_fetch_date = None
self.total_pages = 1
def fetch_papers(self):
"""
Fetch the first 100 most recent papers from the API, sorted by upvotes descending.
Caches the result and fetches only once a day.
Returns:
tuple: (success: bool, data: list, error_message: str)
"""
today = datetime.now(timezone.utc).date()
if self.last_fetch_date == today and self.cache:
logging.info("Using cached papers.")
return True, self.cache, ""
else:
try:
# Fetch up to max_papers; adjust 'limit' as per API's capability
response = requests.get(f"{API_URL}?page=1&limit={self.max_papers}")
response.raise_for_status()
data = response.json()
if not isinstance(data, list):
raise ValueError("API response is not a list of papers.")
# Sort papers by upvotes descending
sorted_papers = sorted(
data,
key=lambda x: x.get('paper', {}).get('upvotes', 0),
reverse=True
)
# Limit to max_papers
self.cache = sorted_papers[:self.max_papers]
self.last_fetch_date = today
self.total_pages = (len(self.cache) + self.papers_per_page - 1) // self.papers_per_page
logging.info(f"Fetched {len(self.cache)} papers. Total pages: {self.total_pages}")
return True, self.cache, ""
except requests.HTTPError as http_err:
error_message = f"HTTP error occurred: {http_err}"
logging.error(error_message)
return False, [], error_message
except requests.ConnectionError as conn_err:
error_message = f"Connection error occurred: {conn_err}"
logging.error(error_message)
return False, [], error_message
except requests.Timeout as timeout_err:
error_message = f"Timeout error occurred: {timeout_err}"
logging.error(error_message)
return False, [], error_message
except requests.RequestException as req_err:
error_message = f"An error occurred: {req_err}"
logging.error(error_message)
return False, [], error_message
except ValueError as val_err:
error_message = f"Data format error: {val_err}"
logging.error(error_message)
return False, [], error_message
def format_paper(self, paper):
"""
Format a single paper's information into HTML.
"""
title = paper.get('title', 'No title')
paper_id = paper.get('paper', {}).get('id', '')
url = f"https://huggingface.co/papers/{paper_id}" if paper_id else "#"
authors = ', '.join([author.get('name', 'Unknown') for author in paper.get('paper', {}).get('authors', [])])
upvotes = paper.get('paper', {}).get('upvotes', 0)
comments = paper.get('numComments', 0)
published_at = paper.get('publishedAt', datetime.now(timezone.utc).isoformat())
try:
published_time = datetime.fromisoformat(published_at.replace('Z', '+00:00'))
time_ago = (datetime.now(timezone.utc) - published_time).days
except ValueError:
time_ago = "Unknown"
return f"""<div style='border-bottom: 1px solid #eee; padding: 10px 0;'>
<a href='{url}' target='_blank' style='color: #000; text-decoration: none; font-weight: bold;'>{title}</a>
<div style='font-size: 0.8em; color: #666; margin-top: 5px;'>
{upvotes} upvotes | by {authors} | {time_ago} days ago | {comments} comments
</div>
</div>"""
def render_papers(self, papers, page=1):
"""
Render HTML for a list of papers based on the current page.
"""
if not papers:
return "<div>No papers available for this page.</div>"
start = (page - 1) * self.papers_per_page
end = start + self.papers_per_page
page_papers = papers[start:end]
return "".join([self.format_paper(paper) for paper in page_papers])
def search_papers(self, query):
"""
Filter papers based on the search query.
"""
if not query:
return self.cache
return [paper for paper in self.cache if query.lower() in paper.get('title', '').lower()]
css = """
html, body {
height: 100%;
margin: 0;
padding: 0;
display: flex;
justify-content: center;
align-items: center;
background-color: #f0f0f0;
}
.container {
font-family: Arial, sans-serif;
max-width: 800px;
width: 100%;
background-color: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0 0 10px rgba(0,0,0,0.1);
}
.paper-list {
max-height: 400px;
overflow-y: auto;
border: 1px solid #eee;
border-radius: 5px;
padding: 10px;
margin-bottom: 10px;
}
.search-row {
display: flex;
gap: 10px;
margin-bottom: 20px;
}
.title {
text-align: center;
color: #333;
}
.footer {
display: flex;
justify-content: space-between;
align-items: center;
margin-top: 10px;
}
.error-message {
color: red;
margin-bottom: 10px;
text-align: center;
}
"""
# Instantiate the PaperManager
paper_manager = PaperManager()
def initialize():
"""
Initialize the app by fetching papers and setting initial states.
"""
success, papers, error_message = paper_manager.fetch_papers()
if success:
page = 1
total_pages = paper_manager.total_pages
page_papers = paper_manager.render_papers(papers, page)
page_info_text = f"Page {page} of {total_pages}"
return page_papers, page, total_pages, papers, page_info_text, ""
else:
error_html = f"<div class='error-message'>Error fetching papers: {error_message}</div>"
return error_html, 1, 1, [], "Page 1 of 1", error_message
def refresh_papers(current_page, query):
"""
Refresh the papers. Fetch new data if a new day has started.
"""
success, papers, error_message = paper_manager.fetch_papers()
if success:
# Reset to first page on refresh
new_page = 1
if query:
filtered = paper_manager.search_papers(query)
total_pages = (len(filtered) + paper_manager.papers_per_page - 1) // paper_manager.papers_per_page if filtered else 1
page_papers = paper_manager.render_papers(filtered, new_page)
page_info_text = f"Page {new_page} of {total_pages}"
return page_papers, new_page, total_pages, filtered, page_info_text, ""
else:
papers = paper_manager.cache
total_pages = paper_manager.total_pages
page_papers = paper_manager.render_papers(papers, new_page)
page_info_text = f"Page {new_page} of {total_pages}"
return page_papers, new_page, total_pages, papers, page_info_text, ""
else:
error_html = f"<div class='error-message'>Error fetching papers: {error_message}</div>"
return error_html, current_page, paper_manager.total_pages, [], f"Page {current_page} of {paper_manager.total_pages}", error_message
def search_papers(query, papers):
"""
Search for papers based on the query and update the display.
"""
if query:
filtered = paper_manager.search_papers(query)
else:
filtered = papers
total_pages = (len(filtered) + paper_manager.papers_per_page - 1) // paper_manager.papers_per_page if filtered else 1
page = 1
page_papers = paper_manager.render_papers(filtered, page)
page_info_text = f"Page {page} of {total_pages}"
if not filtered:
error_html = "<div class='error-message'>No papers match your search query.</div>"
else:
error_html = ""
return page_papers, page, total_pages, filtered, page_info_text, error_html
def change_page(direction, current_page, total_pages, papers, query):
"""
Change the current page based on the direction ('next' or 'prev').
"""
if direction == "next" and current_page < total_pages:
new_page = current_page + 1
elif direction == "prev" and current_page > 1:
new_page = current_page - 1
else:
new_page = current_page # No change if limits are reached
if not papers:
page_papers = "<div>No papers available for this page.</div>"
else:
page_papers = paper_manager.render_papers(papers, new_page)
page_info_text = f"Page {new_page} of {total_pages}"
return page_papers, new_page, total_pages, papers, page_info_text, ""
def go_prev(current_page, total_pages, papers, query):
"""
Handle the 'Previous Page' button click.
"""
return change_page("prev", current_page, total_pages, papers, query)
def go_next(current_page, total_pages, papers, query):
"""
Handle the 'Next Page' button click.
"""
return change_page("next", current_page, total_pages, papers, query)
# Define the Gradio Blocks interface
demo = gr.Blocks(css=css)
with demo:
with gr.Column(elem_classes=["container"]):
# Title
gr.Markdown("# Daily Papers - HackerNews Style", elem_classes=["title"])
# Search and Refresh Row
with gr.Row(elem_classes=["search-row"]):
search_input = gr.Textbox(label="Search papers", placeholder="Enter search term...")
refresh_button = gr.Button("Refresh")
# Error Message Display
error_display = gr.HTML(elem_classes=["error-message"])
# Paper List Display
paper_list = gr.HTML(elem_classes=["paper-list"])
# Pagination Controls
with gr.Row(elem_classes=["footer"]):
prev_button = gr.Button("Previous Page")
page_info = gr.Markdown("Page 1 of 1")
next_button = gr.Button("Next Page")
# Hidden States
current_page_state = gr.State(1)
total_pages_state = gr.State(1)
papers_state = gr.State([])
# Initialize the app on load
demo.load(
initialize,
outputs=[paper_list, current_page_state, total_pages_state, papers_state, page_info, error_display]
)
# Search Functionality
search_input.submit(
search_papers,
inputs=[search_input, papers_state],
outputs=[paper_list, current_page_state, total_pages_state, papers_state, page_info, error_display]
)
search_input.change(
search_papers,
inputs=[search_input, papers_state],
outputs=[paper_list, current_page_state, total_pages_state, papers_state, page_info, error_display]
)
# Refresh Functionality
refresh_button.click(
refresh_papers,
inputs=[current_page_state, search_input],
outputs=[paper_list, current_page_state, total_pages_state, papers_state, page_info, error_display]
)
# Pagination Buttons
prev_button.click(
go_prev,
inputs=[current_page_state, total_pages_state, papers_state, search_input],
outputs=[paper_list, current_page_state, total_pages_state, papers_state, page_info, error_display]
)
next_button.click(
go_next,
inputs=[current_page_state, total_pages_state, papers_state, search_input],
outputs=[paper_list, current_page_state, total_pages_state, papers_state, page_info, error_display]
)
# Launch the Gradio app
demo.launch()