Spaces:

lordvader31
/

text-matching

Build error

text-matching / app.py

Keane Moraes

updates on quality of life and output changes

ca564a1 about 2 years ago

2.62 kB

	import streamlit as st
	from topics import TopicModelling
	import mdforest
	import utils
	import os

	st.title("Welcome to Embeddr")

	col1, mid, col2 = st.columns([30,5,20])
	with col1:
	st.markdown("This is a demo of _one of the many_ use cases for an embedding of all your notes. This application lets you find common ideas between any two notes.")
	st.markdown("You can upload two markdown files and the application will find the common ideas between them. It will generate insights based on the common ideas.")
	st.markdown("I will be building a better embedding model soon. Stay tuned for updates. This is just a demo of what is possible with a good embedding model.")
	with col2:
	st.markdown("#### [Sign up for updates](https://embeddr.my.canva.site/)")
	st.image("media/qrcode.png")

	st.markdown("### Drop the first document")
	file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
	st.markdown("### Drop the second document")
	file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")

	topics = {}
	results = {}

	embedder = utils.load_model()
	nlp = utils.load_nlp()

	if not os.path.exists("./prompter/"):
	os.mkdir("./prompter/")

	if file1 is not None and file2 is not None:

	input_text1 = file1.read().decode("utf-8")
	input_text2 = file2.read().decode("utf-8")

	cleaned_text1 = mdforest.clean_markdown(input_text1)
	cleaned_text2 = mdforest.clean_markdown(input_text2)

	st.title("Generating insights")

	with st.spinner('Generating insights...'):

	insight1 = TopicModelling(cleaned_text1)
	insight2 = TopicModelling(cleaned_text2)

	keywords1, concepts1 = insight1.generate_topics()
	topics['insight1'] = [keywords1, concepts1]
	keywords2, concepts2 = insight2.generate_topics()
	topics['insight2'] = [keywords2, concepts2]

	with st.spinner("Flux capacitor is fluxing..."):
	clutered = utils.cluster_based_on_topics(nlp, embedder, cleaned_text1, cleaned_text2, num_clusters=3)

	with st.spinner("Polishing up"):
	results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
	st.success("Done!")

	st.title("Insights generated")
	st.markdown("### The following insights are common to both documents.")
	for result in results:
	with st.expander(result["name"]):
	st.write(result["description"])
	st.markdown("Related Concepts:")
	for insight in result["concepts"]:
	st.markdown(f" - {insight}")