Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update
Browse files- app.py +1 -1
- papers.py +12 -21
- requirements.txt +0 -1
app.py
CHANGED
@@ -24,7 +24,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
24 |
with gr.Row():
|
25 |
start_date = Calendar(label="Start date", type="datetime", value="2023-05-05")
|
26 |
end_date = Calendar(label="End date", type="datetime")
|
27 |
-
search_title = gr.Textbox(label="Search
|
28 |
|
29 |
num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(paper_list.df_raw), interactive=False)
|
30 |
df = gr.Dataframe(
|
|
|
24 |
with gr.Row():
|
25 |
start_date = Calendar(label="Start date", type="datetime", value="2023-05-05")
|
26 |
end_date = Calendar(label="End date", type="datetime")
|
27 |
+
search_title = gr.Textbox(label="Search title")
|
28 |
|
29 |
num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(paper_list.df_raw), interactive=False)
|
30 |
df = gr.Dataframe(
|
papers.py
CHANGED
@@ -4,7 +4,6 @@ import operator
|
|
4 |
|
5 |
import datasets
|
6 |
import pandas as pd
|
7 |
-
import requests
|
8 |
import tqdm.auto
|
9 |
|
10 |
|
@@ -16,31 +15,19 @@ class PaperInfo:
|
|
16 |
title: str
|
17 |
paper_page: str
|
18 |
upvotes: int
|
19 |
-
published_at: str
|
20 |
-
|
21 |
-
def __post_init__(self):
|
22 |
-
object.__setattr__(self, "published_at", PaperInfo.convert_timestamp(self.published_at))
|
23 |
-
|
24 |
-
@staticmethod
|
25 |
-
def convert_timestamp(timestamp: str) -> str:
|
26 |
-
try:
|
27 |
-
return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y/%m/%d %H:%M:%S")
|
28 |
-
except ValueError:
|
29 |
-
return timestamp
|
30 |
|
31 |
|
32 |
def get_df() -> pd.DataFrame:
|
33 |
-
df =
|
34 |
-
|
|
|
|
|
|
|
35 |
paper_info = []
|
36 |
for _, row in tqdm.auto.tqdm(df.iterrows(), total=len(df)):
|
37 |
-
res = requests.get(f"https://huggingface.co/api/papers/{row.arxiv_id}").json()
|
38 |
info = PaperInfo(
|
39 |
**row,
|
40 |
-
title=res["title"],
|
41 |
paper_page=f"https://huggingface.co/papers/{row.arxiv_id}",
|
42 |
-
upvotes=res["upvotes"],
|
43 |
-
published_at=res["publishedAt"],
|
44 |
)
|
45 |
paper_info.append(info)
|
46 |
return pd.DataFrame([dataclasses.asdict(info) for info in paper_info])
|
@@ -65,7 +52,6 @@ class Prettifier:
|
|
65 |
return f'<div class="{class_name}">{text}</div>'
|
66 |
|
67 |
def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
|
68 |
-
df = df.sort_values("arxiv_id", ascending=False).reset_index(drop=True)
|
69 |
new_rows = []
|
70 |
for _, row in df.iterrows():
|
71 |
new_row = dict(row) | {
|
@@ -99,7 +85,12 @@ class PaperList:
|
|
99 |
def column_datatype(self):
|
100 |
return list(map(operator.itemgetter(1), self.COLUMN_INFO))
|
101 |
|
102 |
-
def search(
|
|
|
|
|
|
|
|
|
|
|
103 |
df = self.df_raw.copy()
|
104 |
df["date"] = pd.to_datetime(df["date"])
|
105 |
|
@@ -108,7 +99,7 @@ class PaperList:
|
|
108 |
df["date"] = df["date"].dt.strftime("%Y-%m-%d")
|
109 |
|
110 |
# Filter by title
|
111 |
-
df = df[df["title"].str.contains(
|
112 |
|
113 |
df_prettified = self._prettifier(df).loc[:, self.column_names]
|
114 |
return df_prettified
|
|
|
4 |
|
5 |
import datasets
|
6 |
import pandas as pd
|
|
|
7 |
import tqdm.auto
|
8 |
|
9 |
|
|
|
15 |
title: str
|
16 |
paper_page: str
|
17 |
upvotes: int
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
def get_df() -> pd.DataFrame:
|
21 |
+
df = pd.merge(
|
22 |
+
left=datasets.load_dataset("hysts-bot-data/daily-papers")["train"].to_pandas(),
|
23 |
+
right=datasets.load_dataset("hysts-bot-data/daily-papers-upvotes")["train"].to_pandas(),
|
24 |
+
on="arxiv_id",
|
25 |
+
)
|
26 |
paper_info = []
|
27 |
for _, row in tqdm.auto.tqdm(df.iterrows(), total=len(df)):
|
|
|
28 |
info = PaperInfo(
|
29 |
**row,
|
|
|
30 |
paper_page=f"https://huggingface.co/papers/{row.arxiv_id}",
|
|
|
|
|
31 |
)
|
32 |
paper_info.append(info)
|
33 |
return pd.DataFrame([dataclasses.asdict(info) for info in paper_info])
|
|
|
52 |
return f'<div class="{class_name}">{text}</div>'
|
53 |
|
54 |
def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
|
55 |
new_rows = []
|
56 |
for _, row in df.iterrows():
|
57 |
new_row = dict(row) | {
|
|
|
85 |
def column_datatype(self):
|
86 |
return list(map(operator.itemgetter(1), self.COLUMN_INFO))
|
87 |
|
88 |
+
def search(
|
89 |
+
self,
|
90 |
+
start_date: datetime.datetime,
|
91 |
+
end_date: datetime.datetime,
|
92 |
+
title_search_query: str,
|
93 |
+
) -> pd.DataFrame:
|
94 |
df = self.df_raw.copy()
|
95 |
df["date"] = pd.to_datetime(df["date"])
|
96 |
|
|
|
99 |
df["date"] = df["date"].dt.strftime("%Y-%m-%d")
|
100 |
|
101 |
# Filter by title
|
102 |
+
df = df[df["title"].str.contains(title_search_query, case=False)]
|
103 |
|
104 |
df_prettified = self._prettifier(df).loc[:, self.column_names]
|
105 |
return df_prettified
|
requirements.txt
CHANGED
@@ -3,5 +3,4 @@ gradio==4.21.0
|
|
3 |
gradio_calendar==0.0.4
|
4 |
huggingface_hub==0.21.4
|
5 |
pandas==2.2.0
|
6 |
-
requests==2.31.0
|
7 |
tqdm==4.66.1
|
|
|
3 |
gradio_calendar==0.0.4
|
4 |
huggingface_hub==0.21.4
|
5 |
pandas==2.2.0
|
|
|
6 |
tqdm==4.66.1
|