Spaces:

ltg
/

nor-ud

Build error

File size: 3,303 Bytes

import gradio as gr
import tabulate


description = """
<div style="text-align: center;">
    <h1>Norsk UD (Bokmål og Nynorsk)</h1>
    <p align="center">
        <img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
    </p><p></p>
</div>
"""

text = """1	President	President	PROPN	NNP	Number=Sing	5	nsubj	5:nsubj	_
2	Bush	Bush	PROPN	NNP	Number=Sing	1	flat	1:flat	_
3	on	on	ADP	IN	_	4	case	4:case	_
4	Tuesday	Tuesday	PROPN	NNP	Number=Sing	5	obl	5:obl:on	_
5	nominated	nominate	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	0:root	_
6	two	two	NUM	CD	NumType=Card	7	nummod	7:nummod	_
7	individuals	individual	NOUN	NNS	Number=Plur	5	obj	5:obj	_
8	to	to	PART	TO	_	9	mark	9:mark	_
9	replace	replace	VERB	VB	VerbForm=Inf	5	advcl	5:advcl:to	_
10	retiring	retire	VERB	VBG	VerbForm=Ger	11	amod	11:amod	_
11	jurists	jurist	NOUN	NNS	Number=Plur	9	obj	9:obj	_
12	on	on	ADP	IN	_	14	case	14:case	_
13	federal	federal	ADJ	JJ	Degree=Pos	14	amod	14:amod	_
14	courts	court	NOUN	NNS	Number=Plur	11	nmod	11:nmod:on	_
15	in	in	ADP	IN	_	18	case	18:case	_
16	the	the	DET	DT	Definite=Def|PronType=Art	18	det	18:det	_
17	Washington	Washington	PROPN	NNP	Number=Sing	18	compound	18:compound	_
18	area	area	NOUN	NN	Number=Sing	14	nmod	14:nmod:in	SpaceAfter=No
19	.	.	PUNCT	.	_	5	punct	5:punct	_"""

forms = [
    line.split("\t")[1]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

lemmas = [
    line.split("\t")[2]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

upos = [
    line.split("\t")[3]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

xpos = [
    line.split("\t")[4]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

feats = [
    line.split("\t")[5]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

metadata = [
    line.split("\t")[9]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edges = [
    int(line.split("\t")[6])
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edge_labels = [
    line.split("\t")[7]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

def render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels):
    feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
    max_len = max(1, max([len(feat) for feat in feats]))
    feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
    feats = list(zip(*feats))

    array = [
        [""] + forms,
        ["*LEMMAS:*"] + lemmas,
        ["*UPOS:*"] + upos,
        ["*XPOS:*"] + xpos,
        ["*UFEATS:*"] + list(feats[0]),
        *([""] + list(row) for row in feats[1:])
    ]

    #return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
    return {"value": array[1:], "headers": array[0]}


custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
    display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
    gr.HTML(description)
    gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels), interactive=False, datatype="markdown")

demo.launch()