Spaces:

spark-nlp
/

coreference-resolution

Running

App Files Files Community

coreference-resolution / Demo.py

abdullahmubeen10

Upload 2 files

38c64a9 verified about 1 year ago

raw

history blame

4.49 kB

	import streamlit as st
	import sparknlp
	import os
	import pandas as pd

	from sparknlp.base import *
	from sparknlp.annotator import *
	from pyspark.ml import Pipeline
	from sparknlp.pretrained import PretrainedPipeline

	# Page configuration
	st.set_page_config(
	layout="wide",
	page_title="Spark NLP Demos App",
	initial_sidebar_state="auto"
	)

	# CSS for styling
	st.markdown("""
	<style>
	.main-title {
	font-size: 36px;
	color: #4A90E2;
	font-weight: bold;
	text-align: center;
	}
	.section p, .section ul {
	color: #666666;
	}
	</style>
	""", unsafe_allow_html=True)

	@st.cache_resource
	def init_spark():
	return sparknlp.start()

	@st.cache_resource
	def create_pipeline():
	document = DocumentAssembler() \
	.setInputCol("text") \
	.setOutputCol("document")

	# Step 2: Sentence Detection
	sentenceDetector = SentenceDetector() \
	.setInputCols("document") \
	.setOutputCol("sentences")

	# Step 3: Tokenization
	token = Tokenizer() \
	.setInputCols("sentences") \
	.setOutputCol("tokens") \
	.setContextChars(["(", ")", "?", "!", ".", ","])

	# Step 4: Coreference Resolution
	corefResolution = SpanBertCorefModel().pretrained("spanbert_base_coref") \
	.setInputCols(["sentences", "tokens"]) \
	.setOutputCol("corefs") \
	.setCaseSensitive(False)

	# Define the pipeline
	pipeline = Pipeline(stages=[document, sentenceDetector, token, corefResolution])

	return pipeline

	def fit_data(pipeline, data):
	empty_df = spark.createDataFrame([['']]).toDF('text')
	pipeline_model = pipeline.fit(empty_df)
	model = LightPipeline(pipeline_model)
	results = model.fullAnnotate(data)
	return results

	# Set up the page layout
	st.markdown('<div class="main-title">State-of-the-Art Coreference Resolution in Spark NLP</div>', unsafe_allow_html=True)

	# Sidebar content
	model_name = st.sidebar.selectbox(
	"Choose the pretrained model",
	['spanbert_base_coref'],
	help="For more info about the models visit: https://sparknlp.org/models"
	)

	# Reference notebook link in sidebar
	link = """
	<a href="https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb#L117">
	<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
	</a>
	"""
	st.sidebar.markdown('Reference notebook:')
	st.sidebar.markdown(link, unsafe_allow_html=True)

	# Load examples
	examples = [
	"Alice went to the market. She bought some fresh vegetables there. The tomatoes she purchased were particularly ripe.",
	"Dr. Smith is a renowned surgeon. He has performed over a thousand successful operations. His colleagues respect him a lot.",
	"The company announced a new product launch. It is expected to revolutionize the industry. The CEO was very excited about it.",
	"Jennifer enjoys hiking. She goes to the mountains every weekend. Her favorite spot is the Blue Ridge Mountains.",
	"The team won the championship. They celebrated their victory with a huge party. Their coach praised their hard work and dedication.",
	"Michael is studying computer science. He finds artificial intelligence fascinating. His dream is to work at a leading tech company.",
	"Tom is a skilled guitarist. He plays in a local band. His performances are always energetic and captivating."
	]

	# st.subheader("Automatically detect phrases expressing dates and normalize them with respect to a reference date.")
	selected_text = st.selectbox("Select an example", examples)
	custom_input = st.text_input("Try it with your own Sentence!")

	text_to_analyze = custom_input if custom_input else selected_text

	st.subheader('Full example text')
	st.write(text_to_analyze)

	# Initialize Spark and create pipeline
	spark = init_spark()
	pipeline = create_pipeline()
	output = fit_data(pipeline, text_to_analyze)

	# Display matched sentence
	st.subheader("Processed output:")

	results = {
	'tokens': [coref.result for coref in output[0]['corefs']],
	'corefs': [coref.metadata for coref in output[0]['corefs']]
	}

	df = pd.DataFrame(results)
	df.index += 1
	st.dataframe(df)