Spaces:
Build error
Build error
File size: 1,520 Bytes
d9ce745 1981c78 d9ce745 4268ace 232a10d 4268ace 232a10d d9ce745 1981c78 232a10d 1981c78 28e14c5 232a10d 1981c78 232a10d 28e14c5 1981c78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import streamlit as st
from topics import TopicModelling
import mdforest
import utils
st.title("Drop the first document")
file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
st.title("Drop the second document")
file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")
topics = {}
results = {}
if file1 is not None and file2 is not None:
input_text1 = file1.read().decode("utf-8")
input_text2 = file2.read().decode("utf-8")
cleaned_text1 = mdforest.clean_markdown(input_text1)
cleaned_text2 = mdforest.clean_markdown(input_text2)
st.title("Generating insights")
with st.spinner('Generating insights...'):
insight1 = TopicModelling(cleaned_text1)
insight2 = TopicModelling(cleaned_text2)
keywords1, concepts1 = insight1.generate_topics()
topics['insight1'] = [keywords1, concepts1]
keywords2, concepts2 = insight2.generate_topics()
topics['insight2'] = [keywords2, concepts2]
st.success('Done!')
with st.spinner("Flux capacitor is fluxing..."):
embedder = utils.load_model()
clutered = utils.cluster_based_on_topics(embedder, cleaned_text1, cleaned_text2)
print(clutered)
st.success("Done!")
with st.spinner("Polishing up"):
results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
st.write(results)
st.success("Done!")
|