File size: 1,520 Bytes
d9ce745
1981c78
 
 
d9ce745
4268ace
232a10d
4268ace
232a10d
d9ce745
1981c78
 
 
232a10d
1981c78
 
 
 
 
 
28e14c5
232a10d
1981c78
232a10d
28e14c5
1981c78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import streamlit as st
from topics import TopicModelling
import mdforest
import utils

st.title("Drop the first document")
file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
st.title("Drop the second document")
file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")

topics = {}
results = {}

if file1 is not None and file2 is not None:
    
    input_text1 = file1.read().decode("utf-8")
    input_text2 = file2.read().decode("utf-8")
        
    cleaned_text1 = mdforest.clean_markdown(input_text1)
    cleaned_text2 = mdforest.clean_markdown(input_text2) 
       
    st.title("Generating insights")
    
    with st.spinner('Generating insights...'):
        
        insight1 = TopicModelling(cleaned_text1)
        insight2 = TopicModelling(cleaned_text2)
        
        keywords1, concepts1 = insight1.generate_topics()
        topics['insight1'] = [keywords1, concepts1]
        keywords2, concepts2 = insight2.generate_topics()
        topics['insight2'] = [keywords2, concepts2]
        st.success('Done!')
        
    with st.spinner("Flux capacitor is fluxing..."):
        embedder = utils.load_model()
        clutered = utils.cluster_based_on_topics(embedder, cleaned_text1, cleaned_text2)
        print(clutered)
        st.success("Done!")
        
    with st.spinner("Polishing up"):
        results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
        st.write(results)
        st.success("Done!")