Spaces:

ltg
/

nor-ud

Running

File size: 5,284 Bytes

import gradio as gr
import tabulate
import matplotlib.pyplot as plt
import networkx as nx


def render_dependency_tree(words, parents, labels):
    fig, ax = plt.subplots(figsize=(32, 16))

    # Create a directed graph
    G = nx.DiGraph()

    # Adding nodes to the graph
    for i, word in enumerate(words):
        G.add_node(i, label=word)

    # Adding edges with labels
    for i, (parent, label) in enumerate(zip(parents, labels)):
        if parent != 0:
            G.add_edge(parent - 1, i, label=label)

    # Position nodes using Graphviz
    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')

    # Draw the graph
    nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), 
            arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=24, bbox = dict(facecolor="white", pad=10)
    )

    # Draw edge labels
    edge_labels = nx.get_edge_attributes(G, 'label')
    nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=0.9, font_size=18)

    return fig


description = """
<div style="text-align: center;">
    <h1>Norsk UD (Bokmål og Nynorsk)</h1>
    <p align="center">
        <img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
    </p><p></p>
</div>
"""

text = """1	President	President	PROPN	NNP	Number=Sing	5	nsubj	5:nsubj	_
2	Bush	Bush	PROPN	NNP	Number=Sing	1	flat	1:flat	_
3	on	on	ADP	IN	_	4	case	4:case	_
4	Tuesday	Tuesday	PROPN	NNP	Number=Sing	5	obl	5:obl:on	_
5	nominated	nominate	VERB	VBD	Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	0:root	_
6	two	two	NUM	CD	NumType=Card	7	nummod	7:nummod	_
7	individuals	individual	NOUN	NNS	Number=Plur	5	obj	5:obj	_
8	to	to	PART	TO	_	9	mark	9:mark	_
9	replace	replace	VERB	VB	VerbForm=Inf	5	advcl	5:advcl:to	_
10	retiring	retire	VERB	VBG	VerbForm=Ger	11	amod	11:amod	_
11	jurists	jurist	NOUN	NNS	Number=Plur	9	obj	9:obj	_
12	on	on	ADP	IN	_	14	case	14:case	_
13	federal	federal	ADJ	JJ	Degree=Pos	14	amod	14:amod	_
14	courts	court	NOUN	NNS	Number=Plur	11	nmod	11:nmod:on	_
15	in	in	ADP	IN	_	18	case	18:case	_
16	the	the	DET	DT	Definite=Def|PronType=Art	18	det	18:det	_
17	Washington	Washington	PROPN	NNP	Number=Sing	18	compound	18:compound	_
18	area	area	NOUN	NN	Number=Sing	14	nmod	14:nmod:in	SpaceAfter=No
19	.	.	PUNCT	.	_	5	punct	5:punct	_"""

forms = [
    line.split("\t")[1]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

lemmas = [
    line.split("\t")[2]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

upos = [
    line.split("\t")[3]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

xpos = [
    line.split("\t")[4]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

feats = [
    line.split("\t")[5]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

metadata = [
    line.split("\t")[9]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edges = [
    int(line.split("\t")[6])
    for line in text.split("\n")
    if line and not line.startswith("#")
]

edge_labels = [
    line.split("\t")[7]
    for line in text.split("\n")
    if line and not line.startswith("#")
]

def render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels):
    feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
    max_len = max(1, max([len(feat) for feat in feats]))
    feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
    feats = list(zip(*feats))

    array = [
        [""] + forms,
        ["*LEMMAS:*"] + lemmas,
        ["*UPOS:*"] + upos,
        ["*XPOS:*"] + xpos,
        ["*UFEATS:*"] + list(feats[0]),
        *([""] + list(row) for row in feats[1:])
    ]

    #return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
    return {"value": array[1:], "headers": array[0]}


custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
    display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
    gr.HTML(description)

    with gr.Row():
        with gr.Column(scale=1):
            source = gr.Textbox(
                label="Input sentence", placeholder="Write a sentende to parse", show_label=False, lines=1, max_lines=5, autofocus=True
            )
            submit = gr.Button("Submit", variant="primary")

        with gr.Column(scale=1):
            dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
                label="Input examples",
                samples=[
                    ["Thomassen er på vei til sin neste gjerning."],
                    ["På toppen av dette kom de metodiske utfordringer."],
                    ["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
                    ["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."],
                ]
            )

    table = gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels), interactive=False, datatype="markdown")
    dependency_plot = gr.Plot(render_dependency_tree(forms, edges, edge_labels), container=False)

demo.launch()