Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -74,48 +74,44 @@ def search(keywords, venues, min_year, max_year):
|
|
| 74 |
|
| 75 |
results = []
|
| 76 |
for venue in search_venues:
|
| 77 |
-
res = []
|
| 78 |
-
if 'acl' in venue:
|
| 79 |
-
paper_tag_on_html = ".//a[@class='align-middle']"
|
| 80 |
-
elif venue == 'iclr':
|
| 81 |
-
paper_tag_on_html = ".//a[@class='Poster']"
|
| 82 |
-
elif venue == 'nips':
|
| 83 |
-
paper_tag_on_html = ".//a[@title='paper title']"
|
| 84 |
-
elif venue == 'icml':
|
| 85 |
-
paper_tag_on_html = ".//div[@class='paper']"
|
| 86 |
-
|
| 87 |
for year in year_range:
|
| 88 |
print(venue, year)
|
| 89 |
-
|
| 90 |
paper_home = get_paper_home(venue, year)
|
| 91 |
url_prefix = url_prefix_mapping[venue]
|
| 92 |
if venue == 'icml':
|
| 93 |
url_prefix = paper_home
|
| 94 |
-
|
| 95 |
try:
|
| 96 |
response = request.urlopen(paper_home)
|
| 97 |
except:
|
| 98 |
continue
|
| 99 |
-
|
| 100 |
html = response.read().decode()
|
| 101 |
tree = etree.fromstring(html, etree.HTMLParser())
|
| 102 |
-
|
| 103 |
-
elements = tree.findall(paper_tag_on_html)
|
| 104 |
-
if venue == 'icml':
|
| 105 |
-
elements = [i for i in elements if check_keywords_icml(i, keywords)]
|
| 106 |
-
urls = [i.find('.//p[@class="links"]').find('a').get('href') for i in elements]
|
| 107 |
-
res.extend(urls)
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
print(len(elements))
|
| 115 |
print()
|
| 116 |
|
| 117 |
-
results.append(res)
|
| 118 |
-
|
| 119 |
return results
|
| 120 |
|
| 121 |
|
|
@@ -133,7 +129,9 @@ current_year = datetime.datetime.now().year
|
|
| 133 |
# ],
|
| 134 |
# outputs=gr.DataFrame(headers=["Paper Link", "Title", "Authors"])
|
| 135 |
# )
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
with gr.Blocks() as demo:
|
| 138 |
with gr.Row(): # Organize inputs and outputs in a row (side by side)
|
| 139 |
with gr.Column(scale=1): # Input section (narrower)
|
|
@@ -165,7 +163,7 @@ with gr.Blocks() as demo:
|
|
| 165 |
|
| 166 |
# Link the input components to the output function
|
| 167 |
submit_button.click(
|
| 168 |
-
|
| 169 |
inputs=[textbox, checkbox, min_year_slider, max_year_slider],
|
| 170 |
outputs=output_table
|
| 171 |
)
|
|
|
|
| 74 |
|
| 75 |
results = []
|
| 76 |
for venue in search_venues:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
for year in year_range:
|
| 78 |
print(venue, year)
|
|
|
|
| 79 |
paper_home = get_paper_home(venue, year)
|
| 80 |
url_prefix = url_prefix_mapping[venue]
|
| 81 |
if venue == 'icml':
|
| 82 |
url_prefix = paper_home
|
| 83 |
+
|
| 84 |
try:
|
| 85 |
response = request.urlopen(paper_home)
|
| 86 |
except:
|
| 87 |
continue
|
| 88 |
+
|
| 89 |
html = response.read().decode()
|
| 90 |
tree = etree.fromstring(html, etree.HTMLParser())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
if 'acl' in venue:
|
| 93 |
+
paper_tag_on_html = ".//a[@class='align-middle']"
|
| 94 |
+
elif venue == 'iclr':
|
| 95 |
+
paper_tag_on_html = ".//a[@class='Poster']"
|
| 96 |
+
elif venue == 'nips':
|
| 97 |
+
paper_tag_on_html = ".//a[@title='paper title']"
|
| 98 |
+
elif venue == 'icml':
|
| 99 |
+
paper_tag_on_html = ".//div[@class='paper']"
|
| 100 |
+
|
| 101 |
+
elements = tree.findall(paper_tag_on_html)
|
| 102 |
+
for element in elements:
|
| 103 |
+
if venue == 'icml':
|
| 104 |
+
if check_keywords_icml(element, keywords):
|
| 105 |
+
paper_url = element.find('.//p[@class="links"]').find('a').get('href')
|
| 106 |
+
results.append([paper_url])
|
| 107 |
+
else:
|
| 108 |
+
if check_keywords(element, keywords):
|
| 109 |
+
paper_url = url_prefix + element.get('href')
|
| 110 |
+
results.append([paper_url])
|
| 111 |
+
|
| 112 |
print(len(elements))
|
| 113 |
print()
|
| 114 |
|
|
|
|
|
|
|
| 115 |
return results
|
| 116 |
|
| 117 |
|
|
|
|
| 129 |
# ],
|
| 130 |
# outputs=gr.DataFrame(headers=["Paper Link", "Title", "Authors"])
|
| 131 |
# )
|
| 132 |
+
def test_search(keywords, venues, min_year, max_year):
|
| 133 |
+
return [["https://example.com"], ["https://anotherexample.com"]]
|
| 134 |
+
|
| 135 |
with gr.Blocks() as demo:
|
| 136 |
with gr.Row(): # Organize inputs and outputs in a row (side by side)
|
| 137 |
with gr.Column(scale=1): # Input section (narrower)
|
|
|
|
| 163 |
|
| 164 |
# Link the input components to the output function
|
| 165 |
submit_button.click(
|
| 166 |
+
test_search,
|
| 167 |
inputs=[textbox, checkbox, min_year_slider, max_year_slider],
|
| 168 |
outputs=output_table
|
| 169 |
)
|