nor-ud / app.py
davda54
try with DataFrame
7e60b92
raw
history blame
3.26 kB
import gradio as gr
import tabulate
description = """
<div style="text-align: center;">
<h1>Norsk UD (Bokmål og Nynorsk)</h1>
<p align="center">
<img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
</p><p></p>
</div>
"""
text = """1 President President PROPN NNP Number=Sing 5 nsubj 5:nsubj _
2 Bush Bush PROPN NNP Number=Sing 1 flat 1:flat _
3 on on ADP IN _ 4 case 4:case _
4 Tuesday Tuesday PROPN NNP Number=Sing 5 obl 5:obl:on _
5 nominated nominate VERB VBD Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin 0 root 0:root _
6 two two NUM CD NumType=Card 7 nummod 7:nummod _
7 individuals individual NOUN NNS Number=Plur 5 obj 5:obj _
8 to to PART TO _ 9 mark 9:mark _
9 replace replace VERB VB VerbForm=Inf 5 advcl 5:advcl:to _
10 retiring retire VERB VBG VerbForm=Ger 11 amod 11:amod _
11 jurists jurist NOUN NNS Number=Plur 9 obj 9:obj _
12 on on ADP IN _ 14 case 14:case _
13 federal federal ADJ JJ Degree=Pos 14 amod 14:amod _
14 courts court NOUN NNS Number=Plur 11 nmod 11:nmod:on _
15 in in ADP IN _ 18 case 18:case _
16 the the DET DT Definite=Def|PronType=Art 18 det 18:det _
17 Washington Washington PROPN NNP Number=Sing 18 compound 18:compound _
18 area area NOUN NN Number=Sing 14 nmod 14:nmod:in SpaceAfter=No
19 . . PUNCT . _ 5 punct 5:punct _"""
forms = [
line.split("\t")[1]
for line in text.split("\n")
if line and not line.startswith("#")
]
lemmas = [
line.split("\t")[2]
for line in text.split("\n")
if line and not line.startswith("#")
]
upos = [
line.split("\t")[3]
for line in text.split("\n")
if line and not line.startswith("#")
]
xpos = [
line.split("\t")[4]
for line in text.split("\n")
if line and not line.startswith("#")
]
feats = [
line.split("\t")[5]
for line in text.split("\n")
if line and not line.startswith("#")
]
metadata = [
line.split("\t")[9]
for line in text.split("\n")
if line and not line.startswith("#")
]
edges = [
int(line.split("\t")[6])
for line in text.split("\n")
if line and not line.startswith("#")
]
edge_labels = [
line.split("\t")[7]
for line in text.split("\n")
if line and not line.startswith("#")
]
def render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels):
feats = [feat.split("|") for feat in feats]
max_len = max([len(feat) for feat in feats])
feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
feats = list(zip(*feats))
array = [
["<b>forms</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"] + forms,
["<b>lemmas</b>&nbsp;&nbsp;&nbsp&nbsp;&nbsp;"] + lemmas,
["<b>upos</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"] + upos,
["<b>xpos</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"] + xpos,
["<b>feats</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"] + list(feats[0]),
*([""] + list(row) for row in feats[1:])
]
#return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
return {"value": array[1:], "headers": array[0]}
with gr.Blocks(theme='sudeepshouche/minimalist') as demo:
gr.HTML(description)
gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels), interactive=False)
demo.launch()