File size: 4,303 Bytes
ada9d7c
0e59e1d
4a39fff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ada9d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba91ea6
b0cf51d
 
eb0f569
 
ba91ea6
 
bc4505b
144e6f4
 
 
 
eb0f569
ba91ea6
 
7e60b92
 
ada9d7c
 
eb0f569
 
 
 
 
 
 
 
ada9d7c
eb0f569
4a39fff
ada9d7c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import gradio as gr
import tabulate
import matplotlib.pyplot as plt
import networkx as nx


def render_dependency_tree(words, parents, labels):
    fig, ax = plt.subplots(figsize=(10, 6))

    # Create a directed graph
    G = nx.DiGraph()

    # Adding nodes to the graph
    for i, word in enumerate(words):
        G.add_node(i, label=word)

    # Adding edges with labels
    for i, (parent, label) in enumerate(zip(parents, labels)):
        if parent != -1:
            G.add_edge(parent, i, label=label)

    # Position nodes using Graphviz
    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')

    # Draw the graph
    nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), 
            arrows=True, node_color='white', node_size=3000)

    # Draw edge labels
    edge_labels = nx.get_edge_attributes(G, 'label')
    nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False)

    return fig


description = """
<div style="text-align: center;">
    <h1>Norsk UD (Bokmål og Nynorsk)</h1>
    <p align="center">
        <img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
    </p><p></p>
</div>
"""

text = """1	President	President	PROPN	NNP	Number=Sing	5	nsubj	5:nsubj	_
2	Bush	Bush	PROPN	NNP	Number=Sing	1	flat	1:flat	_
3	on	on	ADP	IN	_	4	case	4:case	_
4	Tuesday	Tuesday	PROPN	NNP	Number=Sing	5	obl	5:obl:on	_
5	nominated	nominate	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	0:root	_
6	two	two	NUM	CD	NumType=Card	7	nummod	7:nummod	_
7	individuals	individual	NOUN	NNS	Number=Plur	5	obj	5:obj	_
8	to	to	PART	TO	_	9	mark	9:mark	_
9	replace	replace	VERB	VB	VerbForm=Inf	5	advcl	5:advcl:to	_
10	retiring	retire	VERB	VBG	VerbForm=Ger	11	amod	11:amod	_
11	jurists	jurist	NOUN	NNS	Number=Plur	9	obj	9:obj	_
12	on	on	ADP	IN	_	14	case	14:case	_
13	federal	federal	ADJ	JJ	Degree=Pos	14	amod	14:amod	_
14	courts	court	NOUN	NNS	Number=Plur	11	nmod	11:nmod:on	_
15	in	in	ADP	IN	_	18	case	18:case	_
16	the	the	DET	DT	Definite=Def|PronType=Art	18	det	18:det	_
17	Washington	Washington	PROPN	NNP	Number=Sing	18	compound	18:compound	_
18	area	area	NOUN	NN	Number=Sing	14	nmod	14:nmod:in	SpaceAfter=No
19	.	.	PUNCT	.	_	5	punct	5:punct	_"""

forms = [
    line.split("\t")[1]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

lemmas = [
    line.split("\t")[2]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

upos = [
    line.split("\t")[3]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

xpos = [
    line.split("\t")[4]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

feats = [
    line.split("\t")[5]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

metadata = [
    line.split("\t")[9]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edges = [
    int(line.split("\t")[6])
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edge_labels = [
    line.split("\t")[7]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

def render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels):
    feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
    max_len = max(1, max([len(feat) for feat in feats]))
    feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
    feats = list(zip(*feats))

    array = [
        [""] + forms,
        ["*LEMMAS:*"] + lemmas,
        ["*UPOS:*"] + upos,
        ["*XPOS:*"] + xpos,
        ["*UFEATS:*"] + list(feats[0]),
        *([""] + list(row) for row in feats[1:])
    ]

    #return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
    return {"value": array[1:], "headers": array[0]}


custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
    display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
    gr.HTML(description)
    gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels), interactive=False, datatype="markdown")
    gr.Plot(render_dependency_tree(forms, edges, edge_labels), interactive=False)

demo.launch()