text-matching / app.py
Keane Moraes
patch for pormpter folder saving
10d9795
raw
history blame
1.67 kB
import streamlit as st
from topics import TopicModelling
import mdforest
import utils
import os
st.title("Drop the first document")
file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
st.title("Drop the second document")
file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")
topics = {}
results = {}
embedder = utils.load_model()
nlp = utils.load_nlp()
if not os.path.exists("./prompter/"):
os.mkdir("./prompter/")
if file1 is not None and file2 is not None:
input_text1 = file1.read().decode("utf-8")
input_text2 = file2.read().decode("utf-8")
cleaned_text1 = mdforest.clean_markdown(input_text1)
cleaned_text2 = mdforest.clean_markdown(input_text2)
st.title("Generating insights")
with st.spinner('Generating insights...'):
insight1 = TopicModelling(cleaned_text1)
insight2 = TopicModelling(cleaned_text2)
keywords1, concepts1 = insight1.generate_topics()
topics['insight1'] = [keywords1, concepts1]
keywords2, concepts2 = insight2.generate_topics()
topics['insight2'] = [keywords2, concepts2]
with st.spinner("Flux capacitor is fluxing..."):
clutered = utils.cluster_based_on_topics(nlp, embedder, cleaned_text1, cleaned_text2, num_clusters=3)
with st.spinner("Polishing up"):
results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
st.success("Done!")
st.title("Insights generated")
for result in results:
with st.expander("See explanation"):
st.write(result)