Spaces:

lordvader31
/

text-matching

Build error

text-matching / app.py

Keane Moraes

speeding up the clustering

9a105fd over 2 years ago

1.6 kB

	import streamlit as st
	from topics import TopicModelling
	import mdforest
	import utils

	st.title("Drop the first document")
	file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
	st.title("Drop the second document")
	file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")

	topics = {}
	results = {}

	if file1 is not None and file2 is not None:

	input_text1 = file1.read().decode("utf-8")
	input_text2 = file2.read().decode("utf-8")

	cleaned_text1 = mdforest.clean_markdown(input_text1)
	cleaned_text2 = mdforest.clean_markdown(input_text2)

	st.title("Generating insights")

	with st.spinner('Generating insights...'):

	insight1 = TopicModelling(cleaned_text1)
	insight2 = TopicModelling(cleaned_text2)

	keywords1, concepts1 = insight1.generate_topics()
	topics['insight1'] = [keywords1, concepts1]
	keywords2, concepts2 = insight2.generate_topics()
	topics['insight2'] = [keywords2, concepts2]

	with st.spinner("Flux capacitor is fluxing..."):
	embedder = utils.load_model()
	clutered = utils.cluster_based_on_topics(embedder, cleaned_text1, cleaned_text2, num_clusters=5)
	# print(clutered)

	with st.spinner("Polishing up"):
	results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
	st.success("Done!")

	st.title("Insights generated")

	for result in results:
	with st.expander("See explanation"):
	st.write(result)