Spaces:
Runtime error
Runtime error
update
Browse files- lrt/__init__.py +2 -1
- lrt/utils/article.py +19 -1
- widgets/body.py +27 -19
lrt/__init__.py
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
from .lrt import LiteratureResearchTool
|
| 2 |
-
from .clustering import Configuration
|
|
|
|
|
|
| 1 |
from .lrt import LiteratureResearchTool
|
| 2 |
+
from .clustering import Configuration
|
| 3 |
+
from .utils import Article, ArticleList
|
lrt/utils/article.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from typing import List, Union, Optional
|
|
|
|
| 2 |
class Article:
|
| 3 |
'''
|
| 4 |
attributes:
|
|
@@ -31,6 +32,15 @@ class Article:
|
|
| 31 |
|
| 32 |
return ret
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
class ArticleList:
|
| 35 |
'''
|
| 36 |
list of articles
|
|
@@ -72,6 +82,12 @@ class ArticleList:
|
|
| 72 |
def __len__(self):
|
| 73 |
return len(self.__list__)
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
@classmethod
|
| 76 |
def parse_ieee_articles(cls,items: Union[dict, List[dict]]):
|
| 77 |
if isinstance(items,dict):
|
|
@@ -391,4 +407,6 @@ if __name__ == '__main__':
|
|
| 391 |
print(pwc_articles)
|
| 392 |
|
| 393 |
for i in ieee_articles:
|
| 394 |
-
print(i)
|
|
|
|
|
|
|
|
|
| 1 |
from typing import List, Union, Optional
|
| 2 |
+
import pandas as pd
|
| 3 |
class Article:
|
| 4 |
'''
|
| 5 |
attributes:
|
|
|
|
| 32 |
|
| 33 |
return ret
|
| 34 |
|
| 35 |
+
def getDict(self) -> dict:
|
| 36 |
+
return {
|
| 37 |
+
'title': self.title,
|
| 38 |
+
'authors': self.authors,
|
| 39 |
+
'abstract': self.abstract,
|
| 40 |
+
'url': self.url,
|
| 41 |
+
'publication_year': self.publication_year
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
class ArticleList:
|
| 45 |
'''
|
| 46 |
list of articles
|
|
|
|
| 82 |
def __len__(self):
|
| 83 |
return len(self.__list__)
|
| 84 |
|
| 85 |
+
def getDataFrame(self) ->pd.DataFrame:
|
| 86 |
+
return pd.DataFrame(
|
| 87 |
+
[x.getDict() for x in self.__list__]
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
@classmethod
|
| 92 |
def parse_ieee_articles(cls,items: Union[dict, List[dict]]):
|
| 93 |
if isinstance(items,dict):
|
|
|
|
| 407 |
print(pwc_articles)
|
| 408 |
|
| 409 |
for i in ieee_articles:
|
| 410 |
+
print(i)
|
| 411 |
+
|
| 412 |
+
print(pwc_articles.getDataFrame())
|
widgets/body.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
|
|
|
|
|
|
|
| 3 |
from lrt_instance import *
|
| 4 |
-
from pyecharts.charts import Bar
|
| 5 |
-
from pyecharts import options as opts
|
| 6 |
-
import streamlit.components.v1 as st_render
|
| 7 |
-
from .utils import generate_html_pyecharts
|
| 8 |
from .charts import build_bar_charts
|
| 9 |
|
| 10 |
def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
|
| 11 |
with st.spinner('Searching...'):
|
| 12 |
paperInGeneral = st.empty() # paper的大概
|
| 13 |
-
paperInGeneral_md = '''# Query Results Preview
|
| 14 |
We have found following papers for you! (displaying 5 papers for each literature platforms)
|
| 15 |
'''
|
| 16 |
if 'IEEE' in platforms:
|
|
@@ -67,28 +69,34 @@ def render_body(platforms, num_papers, num_papers_preview, query_input, show_pre
|
|
| 67 |
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k)
|
| 68 |
for i,plat in enumerate(platforms):
|
| 69 |
clusters, articles = next(generator)
|
| 70 |
-
st.markdown(f'''# {plat} Results''')
|
| 71 |
clusters.sort()
|
| 72 |
|
| 73 |
-
st.markdown(f'''## Clusters Overview''')
|
| 74 |
-
st.markdown(f'''
|
| 75 |
st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
|
| 76 |
-
'''
|
| 77 |
-
plot using pyecharts
|
| 78 |
-
bar = (
|
| 79 |
-
Bar()
|
| 80 |
-
.add_xaxis([f'Cluster {i + 1}' for i in range(len(clusters))])
|
| 81 |
-
.add_yaxis("number of papers", [len(c) for c in clusters])
|
| 82 |
-
.add_yaxis("number of keyphrases", [len(c.get_keyphrases()) for c in clusters])
|
| 83 |
-
)
|
| 84 |
-
html = generate_html_pyecharts(bar, 'tmp.html')
|
| 85 |
-
st_render.html(html, height=500, width=1000)
|
| 86 |
-
'''
|
| 87 |
st.bokeh_chart(build_bar_charts(
|
| 88 |
x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
|
| 89 |
y_names= ['Number of Papers', 'Number of Keyphrases'],
|
| 90 |
y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
|
| 91 |
))
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
|
| 3 |
+
from lrt.clustering.clusters import SingleCluster
|
| 4 |
+
from lrt import ArticleList
|
| 5 |
from lrt_instance import *
|
| 6 |
+
# from pyecharts.charts import Bar
|
| 7 |
+
# from pyecharts import options as opts
|
| 8 |
+
# import streamlit.components.v1 as st_render
|
| 9 |
+
# from .utils import generate_html_pyecharts
|
| 10 |
from .charts import build_bar_charts
|
| 11 |
|
| 12 |
def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
|
| 13 |
with st.spinner('Searching...'):
|
| 14 |
paperInGeneral = st.empty() # paper的大概
|
| 15 |
+
paperInGeneral_md = '''# 0 Query Results Preview
|
| 16 |
We have found following papers for you! (displaying 5 papers for each literature platforms)
|
| 17 |
'''
|
| 18 |
if 'IEEE' in platforms:
|
|
|
|
| 69 |
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k)
|
| 70 |
for i,plat in enumerate(platforms):
|
| 71 |
clusters, articles = next(generator)
|
| 72 |
+
st.markdown(f'''# {i+1} {plat} Results''')
|
| 73 |
clusters.sort()
|
| 74 |
|
| 75 |
+
st.markdown(f'''## {i+1}.1 Clusters Overview''')
|
| 76 |
+
st.markdown(f'''In this section we show the overview of the clusters, more specifically,''')
|
| 77 |
st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
st.bokeh_chart(build_bar_charts(
|
| 79 |
x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
|
| 80 |
y_names= ['Number of Papers', 'Number of Keyphrases'],
|
| 81 |
y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
|
| 82 |
))
|
| 83 |
|
| 84 |
+
st.markdown(f'''## {i+1}.2 Cluster Details''')
|
| 85 |
+
st.markdown(f'''In this section we show the details of each cluster, including''')
|
| 86 |
+
st.markdown(f'''\n- the article information in the cluster\n- the keyphrases of the cluster''')
|
| 87 |
+
for j,cluster in enumerate(clusters):
|
| 88 |
+
assert isinstance(cluster,SingleCluster) #TODO: remove this line
|
| 89 |
+
ids = cluster.elements()
|
| 90 |
+
articles_in_cluster = ArticleList([articles[id] for id in ids])
|
| 91 |
+
st.markdown(f'''**Cluster {j + 1}**''')
|
| 92 |
+
st.dataframe(articles_in_cluster.getDataFrame())
|
| 93 |
+
st.markdown(f'''The top 5 keyphrases of this cluster are:''')
|
| 94 |
+
md = ''
|
| 95 |
+
for keyphrase in cluster.top_5_keyphrases:
|
| 96 |
+
md += f'''- `{keyphrase}`\n'''
|
| 97 |
+
st.markdown(md)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
|
| 101 |
|
| 102 |
|