File size: 2,622 Bytes
d9ce745
1981c78
 
 
10d9795
d9ce745
ca564a1
15ea0fb
 
 
 
 
 
 
 
 
 
 
232a10d
15ea0fb
232a10d
d9ce745
1981c78
 
 
04b8ab3
 
 
10d9795
 
 
232a10d
1981c78
 
 
 
 
 
28e14c5
232a10d
1981c78
232a10d
28e14c5
1981c78
 
 
 
 
 
 
 
 
04b8ab3
1981c78
 
 
 
10296ed
 
ca564a1
10296ed
ca564a1
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
from topics import TopicModelling
import mdforest
import utils
import os

st.title("Welcome to Embeddr")

col1, mid, col2 = st.columns([30,5,20])
with col1:
    st.markdown("This is a demo of _one of the many_ use cases for an embedding of all your notes. This application lets you find **common ideas** between any two notes.")
    st.markdown("You can upload two markdown files and the application will find the common ideas between them. It will generate insights based on the common ideas.")
    st.markdown("**I will be building a better embedding model soon.** Stay tuned for updates. This is just a demo of what is possible with a good embedding model.")
with col2:
    st.markdown("#### [Sign up for updates](https://embeddr.my.canva.site/)")
    st.image("media/qrcode.png")

st.markdown("### Drop the first document")
file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
st.markdown("### Drop the second document")
file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")

topics = {}
results = {}

embedder = utils.load_model()
nlp = utils.load_nlp()

if not os.path.exists("./prompter/"):
    os.mkdir("./prompter/")

if file1 is not None and file2 is not None:
    
    input_text1 = file1.read().decode("utf-8")
    input_text2 = file2.read().decode("utf-8")
        
    cleaned_text1 = mdforest.clean_markdown(input_text1)
    cleaned_text2 = mdforest.clean_markdown(input_text2) 
       
    st.title("Generating insights")
    
    with st.spinner('Generating insights...'):
        
        insight1 = TopicModelling(cleaned_text1)
        insight2 = TopicModelling(cleaned_text2)
        
        keywords1, concepts1 = insight1.generate_topics()
        topics['insight1'] = [keywords1, concepts1]
        keywords2, concepts2 = insight2.generate_topics()
        topics['insight2'] = [keywords2, concepts2]
        
    with st.spinner("Flux capacitor is fluxing..."):
        clutered = utils.cluster_based_on_topics(nlp, embedder, cleaned_text1, cleaned_text2, num_clusters=3)
        
    with st.spinner("Polishing up"):
        results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
        st.success("Done!")
        
    st.title("Insights generated")
    st.markdown("### The following insights are common to both documents.")
    for result in results:
        with st.expander(result["name"]):
            st.write(result["description"])
            st.markdown("Related Concepts:")
            for insight in result["concepts"]:
                st.markdown(f" - {insight}")