text-matching / app.py
Keane Moraes
adding prompts and generation
1981c78
raw
history blame
1.52 kB
import streamlit as st
from topics import TopicModelling
import mdforest
import utils
st.title("Drop the first document")
file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
st.title("Drop the second document")
file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")
topics = {}
results = {}
if file1 is not None and file2 is not None:
input_text1 = file1.read().decode("utf-8")
input_text2 = file2.read().decode("utf-8")
cleaned_text1 = mdforest.clean_markdown(input_text1)
cleaned_text2 = mdforest.clean_markdown(input_text2)
st.title("Generating insights")
with st.spinner('Generating insights...'):
insight1 = TopicModelling(cleaned_text1)
insight2 = TopicModelling(cleaned_text2)
keywords1, concepts1 = insight1.generate_topics()
topics['insight1'] = [keywords1, concepts1]
keywords2, concepts2 = insight2.generate_topics()
topics['insight2'] = [keywords2, concepts2]
st.success('Done!')
with st.spinner("Flux capacitor is fluxing..."):
embedder = utils.load_model()
clutered = utils.cluster_based_on_topics(embedder, cleaned_text1, cleaned_text2)
print(clutered)
st.success("Done!")
with st.spinner("Polishing up"):
results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
st.write(results)
st.success("Done!")