Spaces:
Sleeping
Sleeping
from fasthtml.common import * | |
from datetime import datetime, timedelta | |
import requests | |
from datetime import datetime | |
import json | |
from markdown import markdown | |
from dotenv import load_dotenv | |
loaded = load_dotenv("./.env", override=True) | |
print("Loaded .env file:", loaded) | |
API_URL = os.getenv("API_URL") | |
API_KEY = os.getenv("MS_SEARCH_KEY") | |
css_content = open("styles.css").read() | |
app, rt = fast_app(hdrs=(Style(css_content),)) | |
md_exts='codehilite', 'smarty', 'extra', 'sane_lists' | |
def Markdown(s, exts=md_exts, **kw): return Div(NotStr(markdown(s, extensions=exts)), **kw) | |
def date_range_inputs(start_date, end_date): | |
return Div( | |
Input(type="date", name="start_date", value=start_date.strftime("%Y-%m-%d"), title="Start date"), | |
Input(type="date", name="end_date", value=end_date.strftime("%Y-%m-%d"), title="End date"), | |
cls="date-range" | |
) | |
def search_form(start_date, end_date): | |
return Form( | |
Input(type="text", name="query", placeholder="Enter search query"), | |
date_range_inputs(start_date, end_date), | |
Button("Search", type="submit"), | |
hx_post="/search", | |
hx_target="#search-results", | |
hx_trigger="submit" | |
) | |
def iso_to_unix_timestamp(iso_string): | |
dt = datetime.fromisoformat(iso_string) | |
return int(dt.timestamp()) | |
def unix_timestamp_to_nice_format(timestamp): | |
dt = datetime.fromtimestamp(timestamp) | |
return dt.strftime("%b %d, %Y") | |
def make_query(query, start_date, end_date, page=1, limit=10): | |
url = f"{API_URL}/indexes/comments/search" | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {API_KEY}", | |
} | |
after_timestamp = iso_to_unix_timestamp(start_date) | |
before_timestamp = iso_to_unix_timestamp(end_date) | |
query = { | |
"q": query, | |
"limit": limit, | |
"offset": (page - 1) * limit, | |
"filter": f"comment_updatedAt_timestamp >= {after_timestamp} AND comment_updatedAt_timestamp < {before_timestamp}", | |
"attributesToCrop": ['comment_text'], | |
"cropLength": 30, | |
"attributesToHighlight": ["comment_text", "discussion_title"], | |
"highlightPreTag": "<span class=\"highlight\">", | |
"highlightPostTag": "</span>" | |
} | |
response = requests.post(url, headers=headers, json=query) | |
return response.json() | |
def search_results(query, start_date, end_date, page=1): | |
raw_results = make_query(query, start_date, end_date, page) | |
return Div( | |
make_results_bar(raw_results), | |
Div(*[make_card(r) for r in raw_results["hits"]]), | |
make_pagination(query, start_date, end_date, page, raw_results["estimatedTotalHits"]), | |
id="search-results" | |
) | |
def make_results_bar(results): | |
processing_time = results["processingTimeMs"] | |
estimated_hits = results["estimatedTotalHits"] | |
return Div( | |
Div(f"Processing time: {processing_time}ms"), | |
Div(f"Estimated total hits: {estimated_hits}"), | |
cls="results-bar" | |
) | |
def make_card(result): | |
result = result["_formatted"] | |
url = f"https://hf.co/{result['repo_id']}/discussions/{result['discussion_num']}" | |
date = unix_timestamp_to_nice_format(int(result["comment_updatedAt_timestamp"])) | |
return Div( | |
Div( | |
Strong(NotStr(result["discussion_title"])), | |
P(NotStr(result["comment_text"]), cls="comment-text"), | |
Div(Span(date)), | |
A(url, href=url, target="_blank"), | |
), | |
cls="card-item" | |
) | |
def make_pagination(query, start_date, end_date, current_page, total_hits, limit=10): | |
total_pages = -(-total_hits // limit) # Ceiling division | |
children = [] | |
if current_page > 1: | |
children.append( | |
Button("Previous", | |
hx_post=f"/search?page={current_page-1}", | |
hx_target="#search-results", | |
hx_include="[name='query'], [name='start_date'], [name='end_date']") | |
) | |
children.append(Span(f"Page {current_page} of {total_pages}")) | |
if current_page < total_pages: | |
children.append( | |
Button("Next", | |
hx_post=f"/search?page={current_page+1}", | |
hx_target="#search-results", | |
hx_include="[name='query'], [name='start_date'], [name='end_date']", | |
) | |
) | |
return Div(*children, cls="pagination") | |
def get(): | |
end_date = datetime.now() | |
start_date = end_date - timedelta(days=7) | |
return Titled( | |
"HF Discussion Search", | |
Div( | |
search_form(start_date, end_date), | |
Div(id="search-results"), | |
cls="container" | |
) | |
) | |
def post(query: str, start_date: str, end_date: str, page: int = 1): | |
return search_results(query, start_date, end_date, page) | |
serve() |