import streamlit as st from topics import TopicModelling import mdforest import utils st.title("Drop the first document") file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first") st.title("Drop the second document") file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second") topics = {} results = {} if file1 is not None and file2 is not None: input_text1 = file1.read().decode("utf-8") input_text2 = file2.read().decode("utf-8") cleaned_text1 = mdforest.clean_markdown(input_text1) cleaned_text2 = mdforest.clean_markdown(input_text2) st.title("Generating insights") with st.spinner('Generating insights...'): insight1 = TopicModelling(cleaned_text1) insight2 = TopicModelling(cleaned_text2) keywords1, concepts1 = insight1.generate_topics() topics['insight1'] = [keywords1, concepts1] keywords2, concepts2 = insight2.generate_topics() topics['insight2'] = [keywords2, concepts2] st.success('Done!') with st.spinner("Flux capacitor is fluxing..."): embedder = utils.load_model() clutered = utils.cluster_based_on_topics(embedder, cleaned_text1, cleaned_text2) print(clutered) st.success("Done!") with st.spinner("Polishing up"): results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered) st.write(results) st.success("Done!")