File size: 7,439 Bytes
ada9d7c 0e59e1d 4a39fff 55f9b9d be30936 05373fd 55f9b9d 4a39fff aeabbe6 4a39fff 8f83e8d 4a39fff be2dc5c d74c2a3 4a39fff ef16d31 4a39fff ada9d7c 00d0840 ada9d7c 00d0840 0a00d43 ada9d7c 9d572f3 55f9b9d b0cf51d eb0f569 ba91ea6 be30936 2559909 be30936 2559909 be30936 2559909 be30936 55f9b9d ba91ea6 bc4505b 144e6f4 93bb100 be30936 ba91ea6 7e60b92 ada9d7c eb0f569 ada9d7c 6810129 55f9b9d 6810129 14600a6 6810129 14600a6 55f9b9d 6810129 3293495 6810129 aeabbe6 ada9d7c 55f9b9d cc397ec 55f9b9d cc397ec 55f9b9d 3293495 55f9b9d cc397ec 55f9b9d 242791e ada9d7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import gradio as gr
import tabulate
import matplotlib.pyplot as plt
import networkx as nx
from model import Parser
parser = Parser()
def parse(text):
output = parser.parse(text)
dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprel"])
table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["feats"], output["ne"])
return dependency_tree, table
def render_dependency_tree(words, parents, labels):
fig, ax = plt.subplots(figsize=(40, 16))
# Create a directed graph
G = nx.DiGraph()
# Adding nodes to the graph
for i, word in enumerate(words):
G.add_node(i, label=word)
# Adding edges with labels
for i, (parent, label) in enumerate(zip(parents, labels)):
if parent != 0:
G.add_edge(parent - 1, i, label=label)
# Position nodes using Graphviz
pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
# Draw the graph
nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'),
arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=24, bbox = dict(facecolor="white", pad=10)
)
# Draw edge labels
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=0.9, font_size=18)
return fig
description = """
<div style="text-align: center;">
<h1>Norsk UD (Bokmål og Nynorsk)</h1>
<p align="center">
<img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
</p><p></p>
</div>
"""
text = """1 Forretten forrett NOUN _ Definite=Def|Gender=Masc|Number=Sing 2 nsubj _ name=O
2 lyder lyde VERB _ Mood=Ind|Tense=Pres|VerbForm=Fin 0 root _ name=O
3 navnet navn NOUN _ Definite=Def|Gender=Neut|Number=Sing 5 nmod _ name=O
4 " $" PUNCT _ _ 5 punct _ SpaceAfter=No|name=O
5 Coquilles Coquilles PROPN _ _ 2 obj _ name=B-PROD
6 St. St. PROPN _ _ 5 flat:name _ name=I-PROD
7 Jacques Jacques PROPN _ _ 5 flat:name _ name=I-PROD
8 Prince Prince PROPN _ _ 5 flat:name _ name=I-PROD
9 de de X _ _ 5 flat:name _ name=I-PROD
10 Norvege Norvege PROPN _ _ 5 flat:name _ SpaceAfter=No|name=I-PROD
11 " $" PUNCT _ _ 5 punct _ SpaceAfter=No|name=O
12 , $, PUNCT _ _ 5 punct _ name=O
13 som som PRON _ PronType=Rel 16 nsubj _ name=O
14 er være AUX _ Mood=Ind|Tense=Pres|VerbForm=Fin 16 cop _ name=O
15 grillet grille ADJ _ Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part 16 amod _ name=O
16 kamskjell kamskjell NOUN _ Definite=Ind|Gender=Neut|Number=Sing 5 acl:relcl _ name=O
17 på på ADP _ _ 19 case _ name=O
18 norsk norsk ADJ _ Definite=Ind|Degree=Pos|Number=Sing 19 amod _ name=O
19 spekeskinke spekeskinke NOUN _ Definite=Ind|Gender=Fem|Number=Sing 16 nmod _ name=O
20 - $- PUNCT _ _ 16 punct _ name=O
21 med med ADP _ _ 22 case _ name=O
22 trøffelhonningvinaigrette trøffelhonningvinaigrette NOUN _ Definite=Ind|Gender=Masc|Number=Sing 16 nmod _ SpaceAfter=No|name=O
23 , $, PUNCT _ _ 22 punct _ name=O
24 ruccolasalat ruccolasalat NOUN _ Definite=Ind|Gender=Masc|Number=Sing 22 conj _ name=O
25 og og CCONJ _ _ 27 cc _ name=O
26 ristede riste ADJ _ Number=Plur|VerbForm=Part 27 amod _ name=O
27 gresskarkjerner gresskarkjerne NOUN _ Definite=Ind|Gender=Fem|Number=Plur 22 conj _ SpaceAfter=No|name=O
28 . $. PUNCT _ _ 2 punct _ name=O"""
forms = [
line.split("\t")[1]
for line in text.split("\n")
if line and not line.startswith("#")
]
lemmas = [
line.split("\t")[2]
for line in text.split("\n")
if line and not line.startswith("#")
]
upos = [
line.split("\t")[3]
for line in text.split("\n")
if line and not line.startswith("#")
]
xpos = [
line.split("\t")[4]
for line in text.split("\n")
if line and not line.startswith("#")
]
feats = [
line.split("\t")[5]
for line in text.split("\n")
if line and not line.startswith("#")
]
ne = [
line.split("\t")[9].split('name=')[-1]
for line in text.split("\n")
if line and not line.startswith("#")
]
edges = [
int(line.split("\t")[6])
for line in text.split("\n")
if line and not line.startswith("#")
]
edge_labels = [
line.split("\t")[7]
for line in text.split("\n")
if line and not line.startswith("#")
]
print(ne, flush=True)
def render_table(forms, lemmas, upos, xpos, feats, named_entities):
feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
max_len = max(1, max([len(feat) for feat in feats]))
feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
feats = list(zip(*feats))
named_entities_converted = []
for i, ne in enumerate(named_entities):
if ne == "O":
named_entities_converted.append("")
elif ne.startswith("B"):
named_entities_converted.append(f"<< {ne.split('-')[1]}")
elif ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I"):
named_entities_converted.append(ne.split('-')[1])
else:
named_entities_converted.append(f"{ne.split('-')[1]} >>")
array = [
[""] + forms,
["*LEMMAS:*"] + lemmas,
["*UPOS:*"] + upos,
["*XPOS:*"] + xpos,
["*UFEATS:*"] + list(feats[0]),
*([""] + list(row) for row in feats[1:]),
["*NE:*"] + named_entities_converted,
]
#return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
return {"value": array[1:], "headers": array[0]}
custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
gr.HTML(description)
with gr.Row():
with gr.Column(scale=1, variant="panel"):
source = gr.Textbox(
label="Input sentence", placeholder="Write a sentende to parse", show_label=False, lines=1, max_lines=5, autofocus=True
)
submit = gr.Button("Submit", variant="primary")
with gr.Column(scale=1, variant="panel"):
dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
label="Input examples",
samples=[
["Thomassen er på vei til sin neste gjerning."],
["På toppen av dette kom de metodiske utfordringer."],
["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."]
]
)
with gr.Column(scale=1, variant="panel"):
gr.Label("", show_label=False, container=False)
table = gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, ne), interactive=False, datatype="markdown")
dependency_plot = gr.Plot(render_dependency_tree(forms, edges, edge_labels), container=False)
source.submit(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
submit.click(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
dataset.click(
fn=lambda text: text[0], inputs=[dataset], outputs=[source]
).then(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
demo.queue(max_size=32)
demo.launch()
|