File size: 5,284 Bytes
ada9d7c
0e59e1d
4a39fff
 
 
 
 
14600a6
4a39fff
 
 
 
 
 
 
 
 
 
8f83e8d
 
4a39fff
 
 
 
 
 
be2dc5c
d74c2a3
4a39fff
 
 
ef16d31
4a39fff
 
ada9d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba91ea6
b0cf51d
 
eb0f569
 
ba91ea6
 
bc4505b
144e6f4
 
 
 
eb0f569
ba91ea6
 
7e60b92
 
ada9d7c
 
eb0f569
 
 
 
 
 
 
 
ada9d7c
6810129
 
 
 
14600a6
6810129
 
14600a6
6810129
 
 
 
 
14600a6
 
 
6810129
 
 
14600a6
 
ada9d7c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import gradio as gr
import tabulate
import matplotlib.pyplot as plt
import networkx as nx


def render_dependency_tree(words, parents, labels):
    fig, ax = plt.subplots(figsize=(32, 16))

    # Create a directed graph
    G = nx.DiGraph()

    # Adding nodes to the graph
    for i, word in enumerate(words):
        G.add_node(i, label=word)

    # Adding edges with labels
    for i, (parent, label) in enumerate(zip(parents, labels)):
        if parent != 0:
            G.add_edge(parent - 1, i, label=label)

    # Position nodes using Graphviz
    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')

    # Draw the graph
    nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), 
            arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=24, bbox = dict(facecolor="white", pad=10)
    )

    # Draw edge labels
    edge_labels = nx.get_edge_attributes(G, 'label')
    nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=0.9, font_size=18)

    return fig


description = """
<div style="text-align: center;">
    <h1>Norsk UD (Bokmål og Nynorsk)</h1>
    <p align="center">
        <img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
    </p><p></p>
</div>
"""

text = """1	President	President	PROPN	NNP	Number=Sing	5	nsubj	5:nsubj	_
2	Bush	Bush	PROPN	NNP	Number=Sing	1	flat	1:flat	_
3	on	on	ADP	IN	_	4	case	4:case	_
4	Tuesday	Tuesday	PROPN	NNP	Number=Sing	5	obl	5:obl:on	_
5	nominated	nominate	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	0:root	_
6	two	two	NUM	CD	NumType=Card	7	nummod	7:nummod	_
7	individuals	individual	NOUN	NNS	Number=Plur	5	obj	5:obj	_
8	to	to	PART	TO	_	9	mark	9:mark	_
9	replace	replace	VERB	VB	VerbForm=Inf	5	advcl	5:advcl:to	_
10	retiring	retire	VERB	VBG	VerbForm=Ger	11	amod	11:amod	_
11	jurists	jurist	NOUN	NNS	Number=Plur	9	obj	9:obj	_
12	on	on	ADP	IN	_	14	case	14:case	_
13	federal	federal	ADJ	JJ	Degree=Pos	14	amod	14:amod	_
14	courts	court	NOUN	NNS	Number=Plur	11	nmod	11:nmod:on	_
15	in	in	ADP	IN	_	18	case	18:case	_
16	the	the	DET	DT	Definite=Def|PronType=Art	18	det	18:det	_
17	Washington	Washington	PROPN	NNP	Number=Sing	18	compound	18:compound	_
18	area	area	NOUN	NN	Number=Sing	14	nmod	14:nmod:in	SpaceAfter=No
19	.	.	PUNCT	.	_	5	punct	5:punct	_"""

forms = [
    line.split("\t")[1]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

lemmas = [
    line.split("\t")[2]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

upos = [
    line.split("\t")[3]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

xpos = [
    line.split("\t")[4]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

feats = [
    line.split("\t")[5]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

metadata = [
    line.split("\t")[9]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edges = [
    int(line.split("\t")[6])
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edge_labels = [
    line.split("\t")[7]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

def render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels):
    feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
    max_len = max(1, max([len(feat) for feat in feats]))
    feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
    feats = list(zip(*feats))

    array = [
        [""] + forms,
        ["*LEMMAS:*"] + lemmas,
        ["*UPOS:*"] + upos,
        ["*XPOS:*"] + xpos,
        ["*UFEATS:*"] + list(feats[0]),
        *([""] + list(row) for row in feats[1:])
    ]

    #return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
    return {"value": array[1:], "headers": array[0]}


custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
    display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
    gr.HTML(description)

    with gr.Row():
        with gr.Column(scale=1):
            source = gr.Textbox(
                label="Input sentence", placeholder="Write a sentende to parse", show_label=False, lines=1, max_lines=5, autofocus=True
            )
            submit = gr.Button("Submit", variant="primary")

        with gr.Column(scale=1):
            dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
                label="Input examples",
                samples=[
                    ["Thomassen er på vei til sin neste gjerning."],
                    ["På toppen av dette kom de metodiske utfordringer."],
                    ["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
                    ["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."],
                ]
            )

    table = gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels), interactive=False, datatype="markdown")
    dependency_plot = gr.Plot(render_dependency_tree(forms, edges, edge_labels), container=False)

demo.launch()