Spaces:
Sleeping
Sleeping
File size: 3,839 Bytes
c3358e9 c8c0cec c3358e9 b715940 e3ff1d7 c8c0cec c3358e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
from huggingface_hub import InferenceClient
from urllib import request
from lxml import etree
url_prefix_mapping = {
'acl': 'https://aclanthology.org',
'emnlp': 'https://aclanthology.org',
'naacl': 'https://aclanthology.org',
'tacl': 'https://aclanthology.org',
'nips': 'https://papers.nips.cc',
'icml': 'https://papers.nips.cc',
'iclr': 'https://iclr.cc',
}
mlr_mapping = {
('icml', 2020): 'v119',
('icml', 2021): 'v139',
('icml', 2022): 'v162',
('icml', 2023): 'v202',
('icml', 2024): 'v139',
}
def get_paper_home(venue, year):
if venue in ['acl', 'emnlp', 'naacl']:
return f'https://aclanthology.org/events/{venue}-{year}'
elif venue == 'nips':
return f'https://papers.{venue}.cc/paper_files/paper/{year}'
elif venue == 'icml':
return f'https://proceedings.mlr.press/{mlr_mapping[(venue, year)]}'
elif venue == 'iclr':
return f'https://iclr.cc/Downloads/{year}'
def check_key_words(ele):
s = ''.join(ele.itertext()).lower()
url = ele.get('href')
for i in keywords:
match = re.search(i, s)
if match:
return True
return False
def check_key_words_icml(ele):
s = ''.join(ele.find('.//p[@class="title"]').itertext()).lower()
url = ele.get('href')
for i in keywords:
match = re.search(i, s)
if match:
return True
return False
def search(keywords, venues):
search_venues = []
if "NeurIPS/ICLR/ICML" in venues:
search_venues.extend(['nips', 'iclr', 'icml'])
if "*ACL" in venues:
search_venues.extend(['acl', 'emnlp', 'naacl', 'tacl'])
if "CVPR/ECCV/ICCV" in venues:
search_venues.extend(['nips', 'iclr', 'icml'])
results = []
for venue in search_venues:
if 'acl' in venue:
paper_tag_on_html = ".//a[@class='align-middle']"
elif venue == 'iclr':
paper_tag_on_html = ".//a[@class='Poster']"
elif venue == 'nips':
paper_tag_on_html = ".//a[@title='paper title']"
elif venue == 'icml':
paper_tag_on_html = ".//div[@class='paper']"
for year in years:
print(venue, year)
paper_home = get_paper_home(venue, year)
url_prefix = url_prefix_mapping[venue]
if venue == 'icml':
url_prefix = paper_home
try:
response = request.urlopen(paper_home)
except:
continue
html = response.read().decode()
tree = etree.fromstring(html, etree.HTMLParser())
elements = tree.findall(paper_tag_on_html)
if venue == 'icml':
elements = [i for i in elements if check_key_words_icml(i)]
urls = [i.find('.//p[@class="links"]').find('a').get('href') for i in elements]
results.extend(urls)
else:
elements = [i for i in elements if check_key_words(i)]
urls = [url_prefix + i.find('.//p[@class="links"]').get('href') for i in elements]
results.extend(urls)
print(len(elements))
print()
return results
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.Interface(
search,
inputs=[
gr.Textbox(lines=2, placeholder="Keywords of the paper title. Supports ReGex."),
gr.CheckboxGroup(["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], label="Choose Venues to Search", value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"])
],
outputs=gr.DataFrame(headers=["Paper Link", ])#"Title", "Authors"
)
if __name__ == "__main__":
demo.launch() |