File size: 4,515 Bytes
bc565d4
 
 
 
 
 
 
 
 
c0dee52
 
bc565d4
 
 
 
c0dee52
bc565d4
c0dee52
 
 
 
 
 
bc565d4
 
c0dee52
 
 
 
bc565d4
 
c0dee52
 
 
bc565d4
 
c0dee52
 
 
 
 
 
 
 
 
bc565d4
 
c0dee52
 
 
 
 
 
 
 
bc565d4
 
c0dee52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc565d4
 
 
 
 
 
 
 
 
 
c0dee52
bc565d4
 
 
 
 
 
 
 
c0dee52
 
bc565d4
 
 
c0dee52
 
 
bc565d4
c0dee52
 
 
 
 
 
 
 
 
 
 
 
bc565d4
c0dee52
 
 
 
bc565d4
c0dee52
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import spacy
from spacy import displacy
import random
from spacy.tokens import Span
import gradio as gr

DEFAULT_MODEL = "en_core_web_sm"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
                'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']

nlp = spacy.load("en_core_web_sm")
nlp2 = spacy.load("en_core_web_md")


def dependency(text, col_punct, col_phrase, compact):
    doc = nlp(text)
    options = {"compact": compact, "collapse_phrases": col_phrase,
               "collapse_punct": col_punct}
    html = displacy.render(doc, style="dep", options=options)
    return html


def entity(text, ents):
    doc = nlp(text)
    options = {"ents": ents}
    html = displacy.render(doc, style="ent", options=options)
    return html


def text(default):
    if default:
        return default


def token(text, attributes):
    data = []
    doc = nlp(text)
    for tok in doc:
        tok_data = []
        for attr in attributes:
            tok_data.append(getattr(tok, attr))
        data.append(tok_data)
    return data


def vectors(text):
    doc = nlp2(text)
    n_chunks = [chunk for chunk in doc.noun_chunks]
    words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
        'PUNCT', "PROPN"]]
    str_list = n_chunks + words
    choice = random.choices(str_list, k=2)
    return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text


def span(text, span1, span2, label1, label2):
    doc = nlp(text)
    idx1_1 = 0
    idx1_2 = 0
    idx2_1 = 0
    idx2_2 = 0

    span1 = span1.split(" ")
    span2 = span2.split(" ")

    for i in range(len(list(doc))):
        tok = list(doc)[i]
        if span1[0] == tok.text:
            idx1_1 = i
        if span1[-1] == tok.text:
            idx1_2 = i + 1
        if span2[0] == tok.text:
            idx2_1 = i
        if span2[-1] == tok.text:
            idx2_2 = i + 1

    doc.spans["sc"] = [
        Span(doc, idx1_1, idx1_2, label1),
        Span(doc, idx2_1, idx2_2, label2),
    ]

    html = displacy.render(doc, style="span")
    return html


demo = gr.Blocks()

with demo:
    # gr.Markdown("Input text here!")
    text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
    with gr.Tabs():
        with gr.TabItem("Dependency"):
            col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
            col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
            compact = gr.Checkbox(label="Compact", value=True)
            depen_output = gr.HTML()
            depen_button = gr.Button("Generate")
        with gr.TabItem("Entity"):
            entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
            entity_output = gr.HTML()
            entity_button = gr.Button("Generate")
        with gr.TabItem("Tokens"):
            tok_input = gr.CheckboxGroup(
                DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
            tok_output = gr.Dataframe()
            tok_button = gr.Button("Generate")
        with gr.TabItem("Similarity"):
            sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
            sim_text2 = gr.Textbox(value="the US", label="Chosen")
            sim_output = gr.Textbox(value="0.09", label="Similarity Score")
            sim_button = gr.Button("Generate")
        with gr.TabItem("Spans"):
            span1 = gr.Textbox(value="David Bowie", label="Span 1")
            label1 = gr.Textbox(value="Full Name", label="Label for Span 1")
            span2 = gr.Textbox(value="David", label="Span 2")
            label2 = gr.Textbox(value="First Name", label="Label for Span 2")
            span_output = gr.HTML()
            span_button = gr.Button("Generate")

    depen_button.click(dependency, inputs=[
                       text_input, col_punct, col_phrase, compact], outputs=depen_output)
    entity_button.click(
        entity, inputs=[text_input, entity_input], outputs=entity_output)
    tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
    sim_button.click(vectors, inputs=[text_input], outputs=[
                     sim_output, sim_text1, sim_text2])
    span_button.click(
        span, inputs=[text_input, span1, span2, label1, label2], outputs=span_output)

demo.launch()