File size: 5,331 Bytes
9420338
 
4837f95
6cfc2b1
 
469542a
4837f95
 
 
 
247c4e3
9420338
68c1b6b
 
 
4837f95
9420338
 
68c1b6b
 
9420338
 
 
68c1b6b
 
 
 
 
 
 
 
 
9420338
 
 
 
68c1b6b
 
 
 
 
 
247c4e3
68c1b6b
9420338
 
 
 
68c1b6b
 
 
 
 
 
 
 
9420338
6cfc2b1
68c1b6b
 
 
 
469542a
 
68c1b6b
 
469542a
 
 
6cfc2b1
 
 
 
 
 
 
 
 
 
 
 
 
b2495e2
469542a
4837f95
b2495e2
 
4837f95
 
b2495e2
247c4e3
 
 
 
 
b2495e2
4837f95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469542a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
from lrt.clustering.clusters import SingleCluster
from lrt.clustering.config import Configuration
from lrt import ArticleList, LiteratureResearchTool
from lrt_instance import *
# from pyecharts.charts import Bar
# from pyecharts import options as opts
# import streamlit.components.v1 as st_render
# from .utils import generate_html_pyecharts
from .charts import build_bar_charts

def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
    with st.spinner('Searching...'):
        paperInGeneral = st.empty()  # paper的大概
        paperInGeneral_md = '''# 0 Query Results Preview
We have found following papers for you! (displaying 5 papers for each literature platforms)
'''
        if 'IEEE' in platforms:
            paperInGeneral_md += '''## IEEE
| ID| Paper Title | Publication Year |
| -------- | -------- | -------- |
'''
            IEEEQuery.__setup_api_key__('vpd9yy325enruv27zj2d353e')
            ieee = IEEEQuery.query(query_input,start_year,end_year,num_papers)
            num_papers_preview = min(len(ieee), num_papers_preview)
            for i in range(num_papers_preview):
                title = str(ieee[i]['title']).replace('\n', ' ')
                publication_year = str(ieee[i]['publication_year']).replace('\n', ' ')
                paperInGeneral_md += f'''|{i + 1}|{title}|{publication_year}|\n'''
        if 'Arxiv' in platforms:
            paperInGeneral_md += '''
## Arxiv
| ID| Paper Title | Publication Year |
| -------- | -------- | -------- |
'''
            arxiv = ArxivQuery.query(query_input, max_results=num_papers)
            num_papers_preview = min(len(arxiv), num_papers_preview)
            for i in range(num_papers_preview):
                title = str(arxiv[i]['title']).replace('\n', ' ')
                publication_year = str(arxiv[i]['published']).replace('\n', ' ')
                paperInGeneral_md += f'''|{i + 1}|{title}|{publication_year}|\n'''
        if 'Paper with Code' in platforms:
            paperInGeneral_md += '''
## Paper with Code
| ID| Paper Title | Publication Year |
| -------- | -------- | -------- |
'''
            pwc = PaperWithCodeQuery.query(query_input, items_per_page=num_papers)
            num_papers_preview = min(len(pwc), num_papers_preview)
            for i in range(num_papers_preview):
                title = str(pwc[i]['title']).replace('\n', ' ')
                publication_year = str(pwc[i]['published']).replace('\n', ' ')
                paperInGeneral_md += f'''|{i + 1}|{title}|{publication_year}|\n'''

        paperInGeneral.markdown(paperInGeneral_md)

def render_body(platforms, num_papers, num_papers_preview, query_input, show_preview:bool,start_year,end_year, clustering_params: dict):

    tmp = st.empty()
    if query_input != '':
        tmp.markdown(f'You entered query: `{query_input}`')

        # preview
        if show_preview:
            __preview__(platforms,num_papers,num_papers_preview,query_input,start_year,end_year)


        # lrt results
        ## baseline
        if clustering_params['dimension_reduction'] == 'none':
            model = baseline_lrt
        else:
            config = Configuration(
                plm= '''all-mpnet-base-v2''',
                dimension_reduction= clustering_params['dimension_reduction'],
                clustering= 'kmeans-euclidean',
                keywords_extraction='keyphrase-transformer'
            )
            model = LiteratureResearchTool(config)

        generator =  model(query_input,num_papers,start_year,end_year,max_k=clustering_params['max_k'],platforms=platforms)
        for i,plat in enumerate(platforms):
            clusters, articles = next(generator)
            st.markdown(f'''# {i+1} {plat} Results''')
            clusters.sort()

            st.markdown(f'''## {i+1}.1 Clusters Overview''')
            st.markdown(f'''In this section we show the overview of the clusters, more specifically,''')
            st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
            st.bokeh_chart(build_bar_charts(
                x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
                y_names= ['Number of Papers', 'Number of Keyphrases'],
                y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
            ))

            st.markdown(f'''## {i+1}.2 Cluster Details''')
            st.markdown(f'''In this section we show the details of each cluster, including''')
            st.markdown(f'''\n- the article information in the cluster\n- the keyphrases of the cluster''')
            for j,cluster in enumerate(clusters):
                assert isinstance(cluster,SingleCluster) #TODO: remove this line
                ids = cluster.elements()
                articles_in_cluster = ArticleList([articles[id] for id in ids])
                st.markdown(f'''**Cluster {j + 1}**''')
                st.dataframe(articles_in_cluster.getDataFrame())
                st.markdown(f'''The top 5 keyphrases of this cluster are:''')
                md = ''
                for keyphrase in cluster.top_5_keyphrases:
                    md += f'''- `{keyphrase}`\n'''
                st.markdown(md)