Spaces:
Runtime error
Runtime error
File size: 4,515 Bytes
bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 bc565d4 c0dee52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import spacy
from spacy import displacy
import random
from spacy.tokens import Span
import gradio as gr
DEFAULT_MODEL = "en_core_web_sm"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
nlp = spacy.load("en_core_web_sm")
nlp2 = spacy.load("en_core_web_md")
def dependency(text, col_punct, col_phrase, compact):
doc = nlp(text)
options = {"compact": compact, "collapse_phrases": col_phrase,
"collapse_punct": col_punct}
html = displacy.render(doc, style="dep", options=options)
return html
def entity(text, ents):
doc = nlp(text)
options = {"ents": ents}
html = displacy.render(doc, style="ent", options=options)
return html
def text(default):
if default:
return default
def token(text, attributes):
data = []
doc = nlp(text)
for tok in doc:
tok_data = []
for attr in attributes:
tok_data.append(getattr(tok, attr))
data.append(tok_data)
return data
def vectors(text):
doc = nlp2(text)
n_chunks = [chunk for chunk in doc.noun_chunks]
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
'PUNCT', "PROPN"]]
str_list = n_chunks + words
choice = random.choices(str_list, k=2)
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
def span(text, span1, span2, label1, label2):
doc = nlp(text)
idx1_1 = 0
idx1_2 = 0
idx2_1 = 0
idx2_2 = 0
span1 = span1.split(" ")
span2 = span2.split(" ")
for i in range(len(list(doc))):
tok = list(doc)[i]
if span1[0] == tok.text:
idx1_1 = i
if span1[-1] == tok.text:
idx1_2 = i + 1
if span2[0] == tok.text:
idx2_1 = i
if span2[-1] == tok.text:
idx2_2 = i + 1
doc.spans["sc"] = [
Span(doc, idx1_1, idx1_2, label1),
Span(doc, idx2_1, idx2_2, label2),
]
html = displacy.render(doc, style="span")
return html
demo = gr.Blocks()
with demo:
# gr.Markdown("Input text here!")
text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
with gr.Tabs():
with gr.TabItem("Dependency"):
col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
compact = gr.Checkbox(label="Compact", value=True)
depen_output = gr.HTML()
depen_button = gr.Button("Generate")
with gr.TabItem("Entity"):
entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
entity_output = gr.HTML()
entity_button = gr.Button("Generate")
with gr.TabItem("Tokens"):
tok_input = gr.CheckboxGroup(
DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
tok_output = gr.Dataframe()
tok_button = gr.Button("Generate")
with gr.TabItem("Similarity"):
sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
sim_text2 = gr.Textbox(value="the US", label="Chosen")
sim_output = gr.Textbox(value="0.09", label="Similarity Score")
sim_button = gr.Button("Generate")
with gr.TabItem("Spans"):
span1 = gr.Textbox(value="David Bowie", label="Span 1")
label1 = gr.Textbox(value="Full Name", label="Label for Span 1")
span2 = gr.Textbox(value="David", label="Span 2")
label2 = gr.Textbox(value="First Name", label="Label for Span 2")
span_output = gr.HTML()
span_button = gr.Button("Generate")
depen_button.click(dependency, inputs=[
text_input, col_punct, col_phrase, compact], outputs=depen_output)
entity_button.click(
entity, inputs=[text_input, entity_input], outputs=entity_output)
tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
sim_button.click(vectors, inputs=[text_input], outputs=[
sim_output, sim_text1, sim_text2])
span_button.click(
span, inputs=[text_input, span1, span2, label1, label2], outputs=span_output)
demo.launch()
|