File size: 5,363 Bytes
bc565d4
 
 
 
 
 
d04bf10
bc565d4
 
c0dee52
 
bc565d4
 
d04bf10
 
 
 
 
 
 
 
 
 
c0dee52
d04bf10
 
 
 
 
 
c0dee52
 
 
 
 
 
bc565d4
d04bf10
 
c0dee52
 
 
 
bc565d4
 
d04bf10
 
c0dee52
 
 
 
 
 
 
 
bc565d4
 
d04bf10
 
 
c0dee52
 
 
 
 
 
bc565d4
 
d04bf10
 
c0dee52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc565d4
 
 
 
 
 
d04bf10
 
bc565d4
 
 
c0dee52
bc565d4
 
 
 
 
 
 
 
d04bf10
 
 
 
 
bc565d4
 
c0dee52
 
 
bc565d4
c0dee52
d04bf10
 
 
 
 
 
 
 
c0dee52
 
 
 
d04bf10
c0dee52
d04bf10
 
 
 
c0dee52
 
d04bf10
bc565d4
c0dee52
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import spacy
from spacy import displacy
import random
from spacy.tokens import Span
import gradio as gr

DEFAULT_MODEL = "en_core_web"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
                'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']


def get_all_models():
    with open("requirements.txt") as f:
        content = f.readlines()
        models = []
        for line in content:
            if "huggingface.co" in line:
                model = "_".join(line.split("/")[4].split("_")[:3])
                if model not in models:
                    models.append(model)
        return models


models = get_all_models()


def dependency(text, col_punct, col_phrase, compact, model):
    nlp = spacy.load(model + "_sm")
    doc = nlp(text)
    options = {"compact": compact, "collapse_phrases": col_phrase,
               "collapse_punct": col_punct}
    html = displacy.render(doc, style="dep", options=options)
    return html


def entity(text, ents, model):
    nlp = spacy.load(model + "_sm")
    doc = nlp(text)
    options = {"ents": ents}
    html = displacy.render(doc, style="ent", options=options)
    return html


def token(text, attributes, model):
    nlp = spacy.load(model + "_sm")
    data = []
    doc = nlp(text)
    for tok in doc:
        tok_data = []
        for attr in attributes:
            tok_data.append(getattr(tok, attr))
        data.append(tok_data)
    return data


def vectors(text, model):
    nlp = spacy.load(model + "_md")
    doc = nlp(text)
    n_chunks = [chunk for chunk in doc.noun_chunks]
    words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
        'PUNCT', "PROPN"]]
    str_list = n_chunks + words
    choice = random.choices(str_list, k=2)
    return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text


def span(text, span1, span2, label1, label2, model):
    nlp = spacy.load(model + "_sm")
    doc = nlp(text)
    idx1_1 = 0
    idx1_2 = 0
    idx2_1 = 0
    idx2_2 = 0

    span1 = span1.split(" ")
    span2 = span2.split(" ")

    for i in range(len(list(doc))):
        tok = list(doc)[i]
        if span1[0] == tok.text:
            idx1_1 = i
        if span1[-1] == tok.text:
            idx1_2 = i + 1
        if span2[0] == tok.text:
            idx2_1 = i
        if span2[-1] == tok.text:
            idx2_2 = i + 1

    doc.spans["sc"] = [
        Span(doc, idx1_1, idx1_2, label1),
        Span(doc, idx2_1, idx2_2, label2),
    ]

    html = displacy.render(doc, style="span")
    return html


demo = gr.Blocks()

with demo:
    text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
    model_input = gr.Dropdown(
        choices=models, value=DEFAULT_MODEL, interactive=True)
    with gr.Tabs():
        with gr.TabItem("Dependency"):
            col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
            col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
            compact = gr.Checkbox(label="Compact", value=True)
            depen_output = gr.HTML()
            depen_button = gr.Button("Generate")
        with gr.TabItem("Entity"):
            entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
            entity_output = gr.HTML()
            entity_button = gr.Button("Generate")
        with gr.TabItem("Tokens"):
            with gr.Column():
                tok_input = gr.CheckboxGroup(
                    DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
                tok_output = gr.Dataframe(
                    headers=DEFAULT_TOK_ATTR, overflow_row_behaviour="paginate")
            tok_button = gr.Button("Generate")
        with gr.TabItem("Similarity"):
            sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
            sim_text2 = gr.Textbox(value="the US", label="Chosen")
            sim_output = gr.Textbox(value="0.09", label="Similarity Score")
            sim_button = gr.Button("Generate")
        with gr.TabItem("Spans"):
            with gr.Row():
                span1 = gr.Textbox(value="David Bowie", label="Span 1")
                label1 = gr.Textbox(value="Name",
                                    label="Label for Span 1")
            with gr.Row():
                span2 = gr.Textbox(value="David", label="Span 2")
                label2 = gr.Textbox(value="First",
                                    label="Label for Span 2")
            span_output = gr.HTML()
            span_button = gr.Button("Generate")

    depen_button.click(dependency, inputs=[
                       text_input, col_punct, col_phrase, compact, model_input], outputs=depen_output)
    entity_button.click(
        entity, inputs=[text_input, entity_input, model_input], outputs=entity_output)
    tok_button.click(
        token, inputs=[text_input, tok_input, model_input], outputs=tok_output)
    sim_button.click(vectors, inputs=[text_input, model_input], outputs=[
                     sim_output, sim_text1, sim_text2])
    span_button.click(
        span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=span_output)

demo.launch()