Spaces:
Build error
Build error
File size: 2,622 Bytes
d9ce745 1981c78 10d9795 d9ce745 ca564a1 15ea0fb 232a10d 15ea0fb 232a10d d9ce745 1981c78 04b8ab3 10d9795 232a10d 1981c78 28e14c5 232a10d 1981c78 232a10d 28e14c5 1981c78 04b8ab3 1981c78 10296ed ca564a1 10296ed ca564a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import streamlit as st
from topics import TopicModelling
import mdforest
import utils
import os
st.title("Welcome to Embeddr")
col1, mid, col2 = st.columns([30,5,20])
with col1:
st.markdown("This is a demo of _one of the many_ use cases for an embedding of all your notes. This application lets you find **common ideas** between any two notes.")
st.markdown("You can upload two markdown files and the application will find the common ideas between them. It will generate insights based on the common ideas.")
st.markdown("**I will be building a better embedding model soon.** Stay tuned for updates. This is just a demo of what is possible with a good embedding model.")
with col2:
st.markdown("#### [Sign up for updates](https://embeddr.my.canva.site/)")
st.image("media/qrcode.png")
st.markdown("### Drop the first document")
file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
st.markdown("### Drop the second document")
file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")
topics = {}
results = {}
embedder = utils.load_model()
nlp = utils.load_nlp()
if not os.path.exists("./prompter/"):
os.mkdir("./prompter/")
if file1 is not None and file2 is not None:
input_text1 = file1.read().decode("utf-8")
input_text2 = file2.read().decode("utf-8")
cleaned_text1 = mdforest.clean_markdown(input_text1)
cleaned_text2 = mdforest.clean_markdown(input_text2)
st.title("Generating insights")
with st.spinner('Generating insights...'):
insight1 = TopicModelling(cleaned_text1)
insight2 = TopicModelling(cleaned_text2)
keywords1, concepts1 = insight1.generate_topics()
topics['insight1'] = [keywords1, concepts1]
keywords2, concepts2 = insight2.generate_topics()
topics['insight2'] = [keywords2, concepts2]
with st.spinner("Flux capacitor is fluxing..."):
clutered = utils.cluster_based_on_topics(nlp, embedder, cleaned_text1, cleaned_text2, num_clusters=3)
with st.spinner("Polishing up"):
results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
st.success("Done!")
st.title("Insights generated")
st.markdown("### The following insights are common to both documents.")
for result in results:
with st.expander(result["name"]):
st.write(result["description"])
st.markdown("Related Concepts:")
for insight in result["concepts"]:
st.markdown(f" - {insight}") |