Spaces:
Runtime error
Runtime error
File size: 3,719 Bytes
95ba32b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import streamlit as st
import difflib
import spacy
@st.cache(allow_output_mutation=True)
def load_model():
return spacy.load('en_core_web_md')
## Layout stuff
st.set_page_config(
page_title="Compare Demo",
page_icon="π",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'mailto:[email protected]',
'Report a bug': None,
'About': "## This a demo showcasing different Legal AI Actions"
}
)
st.title('π Compare Demo')
st.write("""
This demo shows how AI can be used to compare passages of text.
""")
st.write("**π Enter two passages of text on the left** and hit the button **Compare** to see the demo in action")
with st.spinner('βοΈ Loading model...'):
nlp = load_model()
EXAMPLE_TEXT_1 = """This Agreement shall be governed by and interpreted under the laws of the
State of Delaware without regard to its conflicts of law provisions."""
EXAMPLE_TEXT_2 = """This agreement will be governed by and must be construed in accordance with the laws of the State of Israel."""
text_1 = st.sidebar.text_area('Enter a passage of text', value=EXAMPLE_TEXT_1, height=150, key='input1')
text_2 = st.sidebar.text_area('Enter a second passage of text', value=EXAMPLE_TEXT_2, height=150, key='input2')
button = st.sidebar.button('Compare', type='primary', use_container_width=True)
def get_tokens(doc):
return [token.lower for token in doc]
def add_md_color(text, match):
color = 'green' if match else 'red'
return f":{color}[{text}]"
def create_str_output(doc, matching_idxs):
out = []
for token in doc:
if any(token.i in range(start, end) for start, end in matching_idxs):
match = True
else:
match = False
out.append(add_md_color(token.text, match))
return ' '.join(out)
if button:
with st.spinner('βοΈ Comparing Texts...'):
doc_1 = nlp(text_1)
doc_2 = nlp(text_2)
st.header('π§ͺ Comparison')
st.markdown('We can highlight the :green[similarities] and :red[differences] across the two texts')
col1, col2 = st.columns(2)
sm = difflib.SequenceMatcher(None, get_tokens(doc_1), get_tokens(doc_2))
matching_blocks = [match for match in sm.get_matching_blocks()]
doc_1_matching_idxs = []
doc_2_matching_idxs = []
for a, b, n in matching_blocks:
doc_1_matching_idxs.append((a, a + n))
doc_2_matching_idxs.append((b, b + n))
with col1:
st.markdown(create_str_output(doc_1, doc_1_matching_idxs))
with col2:
st.markdown(create_str_output(doc_2, doc_2_matching_idxs))
col1, col2, col3 = st.columns(3)
with col1:
# perform simple sequence matching
sm = difflib.SequenceMatcher(None, get_tokens(doc_1), get_tokens(doc_2))
st.subheader('π Textual Similarity')
st.markdown('We can measure the similarity based on the *wording* of the two texts.')
st.metric(label='Textual Similarity', value=f"{sm.ratio() * 100:.1f}%")
with col2:
st.subheader('π Linguistic Similarity')
st.markdown(
'We can measure the similarity based on the *linguistic features* of the two texts.')
postags_1 = [token.pos_ for token in doc_1]
postags_2 = [token.pos_ for token in doc_2]
sm = difflib.SequenceMatcher(None, postags_1, postags_2)
st.metric(label='Linguistic Similarity', value=f"{sm.ratio() * 100:.1f}%")
with col3:
st.subheader('π Semantic Similarity')
st.markdown('We can measure the similarity based on the *meaning* of the two texts.')
st.metric(label='Semantic Similarity', value=f"{doc_1.similarity(doc_2) * 100:.1f}%")
|