Spaces:
Runtime error
Runtime error
File size: 5,363 Bytes
bc565d4 d04bf10 bc565d4 c0dee52 bc565d4 d04bf10 c0dee52 d04bf10 c0dee52 bc565d4 d04bf10 c0dee52 bc565d4 d04bf10 c0dee52 bc565d4 d04bf10 c0dee52 bc565d4 d04bf10 c0dee52 bc565d4 d04bf10 bc565d4 c0dee52 bc565d4 d04bf10 bc565d4 c0dee52 bc565d4 c0dee52 d04bf10 c0dee52 d04bf10 c0dee52 d04bf10 c0dee52 d04bf10 bc565d4 c0dee52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import spacy
from spacy import displacy
import random
from spacy.tokens import Span
import gradio as gr
DEFAULT_MODEL = "en_core_web"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
def get_all_models():
with open("requirements.txt") as f:
content = f.readlines()
models = []
for line in content:
if "huggingface.co" in line:
model = "_".join(line.split("/")[4].split("_")[:3])
if model not in models:
models.append(model)
return models
models = get_all_models()
def dependency(text, col_punct, col_phrase, compact, model):
nlp = spacy.load(model + "_sm")
doc = nlp(text)
options = {"compact": compact, "collapse_phrases": col_phrase,
"collapse_punct": col_punct}
html = displacy.render(doc, style="dep", options=options)
return html
def entity(text, ents, model):
nlp = spacy.load(model + "_sm")
doc = nlp(text)
options = {"ents": ents}
html = displacy.render(doc, style="ent", options=options)
return html
def token(text, attributes, model):
nlp = spacy.load(model + "_sm")
data = []
doc = nlp(text)
for tok in doc:
tok_data = []
for attr in attributes:
tok_data.append(getattr(tok, attr))
data.append(tok_data)
return data
def vectors(text, model):
nlp = spacy.load(model + "_md")
doc = nlp(text)
n_chunks = [chunk for chunk in doc.noun_chunks]
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
'PUNCT', "PROPN"]]
str_list = n_chunks + words
choice = random.choices(str_list, k=2)
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
def span(text, span1, span2, label1, label2, model):
nlp = spacy.load(model + "_sm")
doc = nlp(text)
idx1_1 = 0
idx1_2 = 0
idx2_1 = 0
idx2_2 = 0
span1 = span1.split(" ")
span2 = span2.split(" ")
for i in range(len(list(doc))):
tok = list(doc)[i]
if span1[0] == tok.text:
idx1_1 = i
if span1[-1] == tok.text:
idx1_2 = i + 1
if span2[0] == tok.text:
idx2_1 = i
if span2[-1] == tok.text:
idx2_2 = i + 1
doc.spans["sc"] = [
Span(doc, idx1_1, idx1_2, label1),
Span(doc, idx2_1, idx2_2, label2),
]
html = displacy.render(doc, style="span")
return html
demo = gr.Blocks()
with demo:
text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
model_input = gr.Dropdown(
choices=models, value=DEFAULT_MODEL, interactive=True)
with gr.Tabs():
with gr.TabItem("Dependency"):
col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
compact = gr.Checkbox(label="Compact", value=True)
depen_output = gr.HTML()
depen_button = gr.Button("Generate")
with gr.TabItem("Entity"):
entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
entity_output = gr.HTML()
entity_button = gr.Button("Generate")
with gr.TabItem("Tokens"):
with gr.Column():
tok_input = gr.CheckboxGroup(
DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
tok_output = gr.Dataframe(
headers=DEFAULT_TOK_ATTR, overflow_row_behaviour="paginate")
tok_button = gr.Button("Generate")
with gr.TabItem("Similarity"):
sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
sim_text2 = gr.Textbox(value="the US", label="Chosen")
sim_output = gr.Textbox(value="0.09", label="Similarity Score")
sim_button = gr.Button("Generate")
with gr.TabItem("Spans"):
with gr.Row():
span1 = gr.Textbox(value="David Bowie", label="Span 1")
label1 = gr.Textbox(value="Name",
label="Label for Span 1")
with gr.Row():
span2 = gr.Textbox(value="David", label="Span 2")
label2 = gr.Textbox(value="First",
label="Label for Span 2")
span_output = gr.HTML()
span_button = gr.Button("Generate")
depen_button.click(dependency, inputs=[
text_input, col_punct, col_phrase, compact, model_input], outputs=depen_output)
entity_button.click(
entity, inputs=[text_input, entity_input, model_input], outputs=entity_output)
tok_button.click(
token, inputs=[text_input, tok_input, model_input], outputs=tok_output)
sim_button.click(vectors, inputs=[text_input, model_input], outputs=[
sim_output, sim_text1, sim_text2])
span_button.click(
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=span_output)
demo.launch()
|