|
import gradio as gr |
|
import tabulate |
|
import matplotlib.pyplot as plt |
|
import networkx as nx |
|
|
|
from model import Parser |
|
|
|
|
|
parser = Parser() |
|
|
|
def parse(text): |
|
output = parser.parse(text) |
|
|
|
dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprel"]) |
|
table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["feats"], output["ne"]) |
|
|
|
return dependency_tree, table |
|
|
|
|
|
def render_dependency_tree(words, parents, labels): |
|
fig, ax = plt.subplots(figsize=(40, 16)) |
|
|
|
|
|
G = nx.DiGraph() |
|
|
|
|
|
for i, word in enumerate(words): |
|
G.add_node(i, label=word) |
|
|
|
|
|
for i, (parent, label) in enumerate(zip(parents, labels)): |
|
if parent != 0: |
|
G.add_edge(parent - 1, i, label=label) |
|
|
|
|
|
pos = nx.nx_agraph.graphviz_layout(G, prog='dot') |
|
|
|
|
|
nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), |
|
arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=24, bbox = dict(facecolor="white", pad=10) |
|
) |
|
|
|
|
|
edge_labels = nx.get_edge_attributes(G, 'label') |
|
nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=0.9, font_size=18) |
|
|
|
return fig |
|
|
|
|
|
description = """ |
|
<div style="text-align: center;"> |
|
<h1>Norsk UD (Bokmål og Nynorsk)</h1> |
|
<p align="center"> |
|
<img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%> |
|
</p><p></p> |
|
</div> |
|
""" |
|
|
|
text = """1 Forretten forrett NOUN _ Definite=Def|Gender=Masc|Number=Sing 2 nsubj _ name=O |
|
2 lyder lyde VERB _ Mood=Ind|Tense=Pres|VerbForm=Fin 0 root _ name=O |
|
3 navnet navn NOUN _ Definite=Def|Gender=Neut|Number=Sing 5 nmod _ name=O |
|
4 " $" PUNCT _ _ 5 punct _ SpaceAfter=No|name=O |
|
5 Coquilles Coquilles PROPN _ _ 2 obj _ name=B-PROD |
|
6 St. St. PROPN _ _ 5 flat:name _ name=I-PROD |
|
7 Jacques Jacques PROPN _ _ 5 flat:name _ name=I-PROD |
|
8 Prince Prince PROPN _ _ 5 flat:name _ name=I-PROD |
|
9 de de X _ _ 5 flat:name _ name=I-PROD |
|
10 Norvege Norvege PROPN _ _ 5 flat:name _ SpaceAfter=No|name=I-PROD |
|
11 " $" PUNCT _ _ 5 punct _ SpaceAfter=No|name=O |
|
12 , $, PUNCT _ _ 5 punct _ name=O |
|
13 som som PRON _ PronType=Rel 16 nsubj _ name=O |
|
14 er være AUX _ Mood=Ind|Tense=Pres|VerbForm=Fin 16 cop _ name=O |
|
15 grillet grille ADJ _ Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part 16 amod _ name=O |
|
16 kamskjell kamskjell NOUN _ Definite=Ind|Gender=Neut|Number=Sing 5 acl:relcl _ name=O |
|
17 på på ADP _ _ 19 case _ name=O |
|
18 norsk norsk ADJ _ Definite=Ind|Degree=Pos|Number=Sing 19 amod _ name=O |
|
19 spekeskinke spekeskinke NOUN _ Definite=Ind|Gender=Fem|Number=Sing 16 nmod _ name=O |
|
20 - $- PUNCT _ _ 16 punct _ name=O |
|
21 med med ADP _ _ 22 case _ name=O |
|
22 trøffelhonningvinaigrette trøffelhonningvinaigrette NOUN _ Definite=Ind|Gender=Masc|Number=Sing 16 nmod _ SpaceAfter=No|name=O |
|
23 , $, PUNCT _ _ 22 punct _ name=O |
|
24 ruccolasalat ruccolasalat NOUN _ Definite=Ind|Gender=Masc|Number=Sing 22 conj _ name=O |
|
25 og og CCONJ _ _ 27 cc _ name=O |
|
26 ristede riste ADJ _ Number=Plur|VerbForm=Part 27 amod _ name=O |
|
27 gresskarkjerner gresskarkjerne NOUN _ Definite=Ind|Gender=Fem|Number=Plur 22 conj _ SpaceAfter=No|name=O |
|
28 . $. PUNCT _ _ 2 punct _ name=O""" |
|
|
|
forms = [ |
|
line.split("\t")[1] |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
lemmas = [ |
|
line.split("\t")[2] |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
upos = [ |
|
line.split("\t")[3] |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
xpos = [ |
|
line.split("\t")[4] |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
feats = [ |
|
line.split("\t")[5] |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
ne = [ |
|
line.split("\t")[9].split('name=')[-1] |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
edges = [ |
|
int(line.split("\t")[6]) |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
|
|
edge_labels = [ |
|
line.split("\t")[7] |
|
for line in text.split("\n") |
|
if line and not line.startswith("#") |
|
] |
|
|
|
print(ne, flush=True) |
|
|
|
def render_table(forms, lemmas, upos, xpos, feats, named_entities): |
|
feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats] |
|
max_len = max(1, max([len(feat) for feat in feats])) |
|
feats = [feat + [""] * (max_len - len(feat)) for feat in feats] |
|
feats = list(zip(*feats)) |
|
|
|
named_entities_converted = [] |
|
for i, ne in enumerate(named_entities): |
|
if ne == "O": |
|
named_entities_converted.append("") |
|
elif ne.startswith("B"): |
|
named_entities_converted.append(f"<< {ne.split('-')[1]}") |
|
elif ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I"): |
|
named_entities_converted.append(ne.split('-')[1]) |
|
else: |
|
named_entities_converted.append(f"{ne.split('-')[1]} >>") |
|
|
|
array = [ |
|
[""] + forms, |
|
["*LEMMAS:*"] + lemmas, |
|
["*UPOS:*"] + upos, |
|
["*XPOS:*"] + xpos, |
|
["*UFEATS:*"] + list(feats[0]), |
|
*([""] + list(row) for row in feats[1:]), |
|
["*NE:*"] + named_entities_converted, |
|
] |
|
|
|
|
|
return {"value": array[1:], "headers": array[0]} |
|
|
|
|
|
custom_css = \ |
|
""" |
|
/* Hide sort buttons at gr.DataFrame */ |
|
.sort-button { |
|
display: none !important; |
|
} |
|
""" |
|
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo: |
|
gr.HTML(description) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1, variant="panel"): |
|
source = gr.Textbox( |
|
label="Input sentence", placeholder="Write a sentende to parse", show_label=False, lines=1, max_lines=5, autofocus=True |
|
) |
|
submit = gr.Button("Submit", variant="primary") |
|
|
|
with gr.Column(scale=1, variant="panel"): |
|
dataset = gr.Dataset(components=[gr.Textbox(visible=False)], |
|
label="Input examples", |
|
samples=[ |
|
["Thomassen er på vei til sin neste gjerning."], |
|
["På toppen av dette kom de metodiske utfordringer."], |
|
["Berntsen har påtatt seg en både viktig og vanskelig oppgave."], |
|
["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."] |
|
] |
|
) |
|
|
|
with gr.Column(scale=1, variant="panel"): |
|
gr.Label("", show_label=False, container=False) |
|
table = gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, ne), interactive=False, datatype="markdown") |
|
dependency_plot = gr.Plot(render_dependency_tree(forms, edges, edge_labels), container=False) |
|
|
|
source.submit( |
|
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True |
|
) |
|
submit.click( |
|
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True |
|
) |
|
dataset.click( |
|
fn=lambda text: text[0], inputs=[dataset], outputs=[source] |
|
).then( |
|
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True |
|
) |
|
|
|
|
|
demo.queue(max_size=32) |
|
demo.launch() |
|
|