Spaces:

ltg
/

nor-ud

Sleeping

nor-ud / app.py

davda54

try with DataFrame

7e60b92 over 1 year ago

3.26 kB

	import gradio as gr
	import tabulate


	description = """
	<div style="text-align: center;">
	<h1>Norsk UD (Bokmål og Nynorsk)</h1>
	<p align="center">
	<img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
	</p><p></p>
	</div>
	"""

	text = """1 President President PROPN NNP Number=Sing 5 nsubj 5:nsubj _
	2 Bush Bush PROPN NNP Number=Sing 1 flat 1:flat _
	3 on on ADP IN _ 4 case 4:case _
	4 Tuesday Tuesday PROPN NNP Number=Sing 5 obl 5:obl:on _
	5 nominated nominate VERB VBD Mood=Ind\|Number=Sing\|Person=3\|Tense=Past\|VerbForm=Fin 0 root 0:root _
	6 two two NUM CD NumType=Card 7 nummod 7:nummod _
	7 individuals individual NOUN NNS Number=Plur 5 obj 5:obj _
	8 to to PART TO _ 9 mark 9:mark _
	9 replace replace VERB VB VerbForm=Inf 5 advcl 5:advcl:to _
	10 retiring retire VERB VBG VerbForm=Ger 11 amod 11:amod _
	11 jurists jurist NOUN NNS Number=Plur 9 obj 9:obj _
	12 on on ADP IN _ 14 case 14:case _
	13 federal federal ADJ JJ Degree=Pos 14 amod 14:amod _
	14 courts court NOUN NNS Number=Plur 11 nmod 11:nmod:on _
	15 in in ADP IN _ 18 case 18:case _
	16 the the DET DT Definite=Def\|PronType=Art 18 det 18:det _
	17 Washington Washington PROPN NNP Number=Sing 18 compound 18:compound _
	18 area area NOUN NN Number=Sing 14 nmod 14:nmod:in SpaceAfter=No
	19 . . PUNCT . _ 5 punct 5:punct _"""

	forms = [
	line.split("\t")[1]
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	lemmas = [
	line.split("\t")[2]
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	upos = [
	line.split("\t")[3]
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	xpos = [
	line.split("\t")[4]
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	feats = [
	line.split("\t")[5]
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	metadata = [
	line.split("\t")[9]
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	edges = [
	int(line.split("\t")[6])
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	edge_labels = [
	line.split("\t")[7]
	for line in text.split("\n")
	if line and not line.startswith("#")
	]

	def render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels):
	feats = [feat.split("\|") for feat in feats]
	max_len = max([len(feat) for feat in feats])
	feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
	feats = list(zip(*feats))

	array = [
	["<b>forms</b>     "] + forms,
	["<b>lemmas</b>  &nbsp  "] + lemmas,
	["<b>upos</b>     "] + upos,
	["<b>xpos</b>     "] + xpos,
	["<b>feats</b>     "] + list(feats[0]),
	*([""] + list(row) for row in feats[1:])
	]

	#return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
	return {"value": array[1:], "headers": array[0]}


	with gr.Blocks(theme='sudeepshouche/minimalist') as demo:
	gr.HTML(description)
	gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels), interactive=False)

	demo.launch()