File size: 7,439 Bytes
ada9d7c
0e59e1d
4a39fff
 
 
55f9b9d
 
 
 
 
 
 
 
be30936
05373fd
55f9b9d
 
 
4a39fff
 
aeabbe6
4a39fff
 
 
 
 
 
 
 
 
 
8f83e8d
 
4a39fff
 
 
 
 
 
be2dc5c
d74c2a3
4a39fff
 
 
ef16d31
4a39fff
 
ada9d7c
 
 
 
 
 
 
 
 
 
 
00d0840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ada9d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00d0840
0a00d43
ada9d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d572f3
 
55f9b9d
b0cf51d
 
eb0f569
 
ba91ea6
be30936
2559909
 
be30936
 
 
2559909
be30936
2559909
be30936
55f9b9d
ba91ea6
bc4505b
144e6f4
 
 
 
93bb100
be30936
ba91ea6
 
7e60b92
 
ada9d7c
 
eb0f569
 
 
 
 
 
 
 
ada9d7c
6810129
 
55f9b9d
6810129
14600a6
6810129
 
14600a6
55f9b9d
6810129
 
 
3293495
 
 
 
6810129
 
 
aeabbe6
 
 
 
ada9d7c
55f9b9d
cc397ec
55f9b9d
 
cc397ec
55f9b9d
 
3293495
55f9b9d
cc397ec
55f9b9d
 
 
242791e
ada9d7c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import gradio as gr
import tabulate
import matplotlib.pyplot as plt
import networkx as nx

from model import Parser


parser = Parser()

def parse(text):
    output = parser.parse(text)

    dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprel"])
    table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["feats"], output["ne"])

    return dependency_tree, table


def render_dependency_tree(words, parents, labels):
    fig, ax = plt.subplots(figsize=(40, 16))

    # Create a directed graph
    G = nx.DiGraph()

    # Adding nodes to the graph
    for i, word in enumerate(words):
        G.add_node(i, label=word)

    # Adding edges with labels
    for i, (parent, label) in enumerate(zip(parents, labels)):
        if parent != 0:
            G.add_edge(parent - 1, i, label=label)

    # Position nodes using Graphviz
    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')

    # Draw the graph
    nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), 
            arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=24, bbox = dict(facecolor="white", pad=10)
    )

    # Draw edge labels
    edge_labels = nx.get_edge_attributes(G, 'label')
    nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=0.9, font_size=18)

    return fig


description = """
<div style="text-align: center;">
    <h1>Norsk UD (Bokmål og Nynorsk)</h1>
    <p align="center">
        <img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
    </p><p></p>
</div>
"""

text = """1	Forretten	forrett	NOUN	_	Definite=Def|Gender=Masc|Number=Sing	2	nsubj	_	name=O
2	lyder	lyde	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	name=O
3	navnet	navn	NOUN	_	Definite=Def|Gender=Neut|Number=Sing	5	nmod	_	name=O
4	"	$"	PUNCT	_	_	5	punct	_	SpaceAfter=No|name=O
5	Coquilles	Coquilles	PROPN	_	_	2	obj	_	name=B-PROD
6	St.	St.	PROPN	_	_	5	flat:name	_	name=I-PROD
7	Jacques	Jacques	PROPN	_	_	5	flat:name	_	name=I-PROD
8	Prince	Prince	PROPN	_	_	5	flat:name	_	name=I-PROD
9	de	de	X	_	_	5	flat:name	_	name=I-PROD
10	Norvege	Norvege	PROPN	_	_	5	flat:name	_	SpaceAfter=No|name=I-PROD
11	"	$"	PUNCT	_	_	5	punct	_	SpaceAfter=No|name=O
12	,	$,	PUNCT	_	_	5	punct	_	name=O
13	som	som	PRON	_	PronType=Rel	16	nsubj	_	name=O
14	er	være	AUX	_	Mood=Ind|Tense=Pres|VerbForm=Fin	16	cop	_	name=O
15	grillet	grille	ADJ	_	Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part	16	amod	_	name=O
16	kamskjell	kamskjell	NOUN	_	Definite=Ind|Gender=Neut|Number=Sing	5	acl:relcl	_	name=O
17	på	på	ADP	_	_	19	case	_	name=O
18	norsk	norsk	ADJ	_	Definite=Ind|Degree=Pos|Number=Sing	19	amod	_	name=O
19	spekeskinke	spekeskinke	NOUN	_	Definite=Ind|Gender=Fem|Number=Sing	16	nmod	_	name=O
20	-	$-	PUNCT	_	_	16	punct	_	name=O
21	med	med	ADP	_	_	22	case	_	name=O
22	trøffelhonningvinaigrette	trøffelhonningvinaigrette	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	16	nmod	_	SpaceAfter=No|name=O
23	,	$,	PUNCT	_	_	22	punct	_	name=O
24	ruccolasalat	ruccolasalat	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	22	conj	_	name=O
25	og	og	CCONJ	_	_	27	cc	_	name=O
26	ristede	riste	ADJ	_	Number=Plur|VerbForm=Part	27	amod	_	name=O
27	gresskarkjerner	gresskarkjerne	NOUN	_	Definite=Ind|Gender=Fem|Number=Plur	22	conj	_	SpaceAfter=No|name=O
28	.	$.	PUNCT	_	_	2	punct	_	name=O"""

forms = [
    line.split("\t")[1]
    for line in text.split("\n")
    if line and not line.startswith("#")
]
lemmas = [
    line.split("\t")[2]
    for line in text.split("\n")
    if line and not line.startswith("#")
]
upos = [
    line.split("\t")[3]
    for line in text.split("\n")
    if line and not line.startswith("#")
]
xpos = [
    line.split("\t")[4]
    for line in text.split("\n")
    if line and not line.startswith("#")
]
feats = [
    line.split("\t")[5]
    for line in text.split("\n")
    if line and not line.startswith("#")
]
ne = [
    line.split("\t")[9].split('name=')[-1]
    for line in text.split("\n")
    if line and not line.startswith("#")
]
edges = [
    int(line.split("\t")[6])
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edge_labels = [
    line.split("\t")[7]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

print(ne, flush=True)

def render_table(forms, lemmas, upos, xpos, feats, named_entities):
    feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
    max_len = max(1, max([len(feat) for feat in feats]))
    feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
    feats = list(zip(*feats))

    named_entities_converted = []
    for i, ne in enumerate(named_entities):
        if ne == "O":
            named_entities_converted.append("")
        elif ne.startswith("B"):
            named_entities_converted.append(f"<< {ne.split('-')[1]}")
        elif ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I"):
            named_entities_converted.append(ne.split('-')[1])
        else:
            named_entities_converted.append(f"{ne.split('-')[1]} >>")

    array = [
        [""] + forms,
        ["*LEMMAS:*"] + lemmas,
        ["*UPOS:*"] + upos,
        ["*XPOS:*"] + xpos,
        ["*UFEATS:*"] + list(feats[0]),
        *([""] + list(row) for row in feats[1:]),
        ["*NE:*"] + named_entities_converted,
    ]

    #return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
    return {"value": array[1:], "headers": array[0]}


custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
    display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
    gr.HTML(description)

    with gr.Row():
        with gr.Column(scale=1, variant="panel"):
            source = gr.Textbox(
                label="Input sentence", placeholder="Write a sentende to parse", show_label=False, lines=1, max_lines=5, autofocus=True
            )
            submit = gr.Button("Submit", variant="primary")

        with gr.Column(scale=1, variant="panel"):
            dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
                label="Input examples",
                samples=[
                    ["Thomassen er på vei til sin neste gjerning."],
                    ["På toppen av dette kom de metodiske utfordringer."],
                    ["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
                    ["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."]
                ]
            )

    with gr.Column(scale=1, variant="panel"):
        gr.Label("", show_label=False, container=False)
        table = gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, ne), interactive=False, datatype="markdown")
        dependency_plot = gr.Plot(render_dependency_tree(forms, edges, edge_labels), container=False)

    source.submit(
        fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
    )
    submit.click(
        fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
    )
    dataset.click(
        fn=lambda text: text[0], inputs=[dataset], outputs=[source]
    ).then(
        fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
    )


demo.queue(max_size=32)
demo.launch()