import gradio as gr import tabulate import matplotlib.pyplot as plt import networkx as nx from model import Parser parser = Parser() def parse(text): output = parser.parse(text) dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprels"]) table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["ne"]) return dependency_tree, table def render_dependency_tree(words, parents, labels): fig, ax = plt.subplots(figsize=(32, 16)) # Create a directed graph G = nx.DiGraph() # Adding nodes to the graph for i, word in enumerate(words): G.add_node(i, label=word) # Adding edges with labels for i, (parent, label) in enumerate(zip(parents, labels)): if parent != 0: G.add_edge(parent - 1, i, label=label) # Position nodes using Graphviz pos = nx.nx_agraph.graphviz_layout(G, prog='dot') # Draw the graph nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=24, bbox = dict(facecolor="white", pad=10) ) # Draw edge labels edge_labels = nx.get_edge_attributes(G, 'label') nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=0.9, font_size=18) return fig description = """

Norsk UD (Bokmål og Nynorsk)

""" text = """1 Forretten forrett NOUN _ Definite=Def|Gender=Masc|Number=Sing 2 nsubj _ name=O 2 lyder lyde VERB _ Mood=Ind|Tense=Pres|VerbForm=Fin 0 root _ name=O 3 navnet navn NOUN _ Definite=Def|Gender=Neut|Number=Sing 5 nmod _ name=O 4 " $" PUNCT _ _ 5 punct _ SpaceAfter=No|name=O 5 Coquilles Coquilles PROPN _ _ 2 obj _ name=B-PROD 6 St. St. PROPN _ _ 5 flat:name _ name=I-PROD 7 Jacques Jacques PROPN _ _ 5 flat:name _ name=I-PROD 8 Prince Prince PROPN _ _ 5 flat:name _ name=I-PROD 9 de de X _ _ 5 flat:name _ name=I-PROD 10 Norvege Norvege PROPN _ _ 5 flat:name _ SpaceAfter=No|name=I-PROD 11 " $" PUNCT _ _ 5 punct _ SpaceAfter=No|name=O 12 , $, PUNCT _ _ 5 punct _ name=O 13 som som PRON _ PronType=Rel 16 nsubj _ name=O 14 er være AUX _ Mood=Ind|Tense=Pres|VerbForm=Fin 16 cop _ name=O 15 grillet grille ADJ _ Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part 16 amod _ name=O 16 kamskjell kamskjell NOUN _ Definite=Ind|Gender=Neut|Number=Sing 5 acl:relcl _ name=O 17 på på ADP _ _ 19 case _ name=O 18 norsk norsk ADJ _ Definite=Ind|Degree=Pos|Number=Sing 19 amod _ name=O 19 spekeskinke spekeskinke NOUN _ Definite=Ind|Gender=Fem|Number=Sing 16 nmod _ name=O 20 - $- PUNCT _ _ 16 punct _ name=O 21 med med ADP _ _ 22 case _ name=O 22 trøffelhonningvinaigrette trøffelhonningvinaigrette NOUN _ Definite=Ind|Gender=Masc|Number=Sing 16 nmod _ SpaceAfter=No|name=O 23 , $, PUNCT _ _ 22 punct _ name=O 24 ruccolasalat ruccolasalat NOUN _ Definite=Ind|Gender=Masc|Number=Sing 22 conj _ name=O 25 og og CCONJ _ _ 27 cc _ name=O 26 ristede riste ADJ _ Number=Plur|VerbForm=Part 27 amod _ name=O 27 gresskarkjerner gresskarkjerne NOUN _ Definite=Ind|Gender=Fem|Number=Plur 22 conj _ SpaceAfter=No|name=O 28 . $. PUNCT _ _ 2 punct _ name=O""" forms = [ line.split("\t")[1] for line in text.split("\n") if line and not line.startswith("#") ] lemmas = [ line.split("\t")[2] for line in text.split("\n") if line and not line.startswith("#") ] upos = [ line.split("\t")[3] for line in text.split("\n") if line and not line.startswith("#") ] xpos = [ line.split("\t")[4] for line in text.split("\n") if line and not line.startswith("#") ] feats = [ line.split("\t")[5] for line in text.split("\n") if line and not line.startswith("#") ] ne = [ line.split("\t")[9].split('name=')[-1] for line in text.split("\n") if line and not line.startswith("#") ] edges = [ int(line.split("\t")[6]) for line in text.split("\n") if line and not line.startswith("#") ] edge_labels = [ line.split("\t")[7] for line in text.split("\n") if line and not line.startswith("#") ] print(ne, flush=True) def render_table(forms, lemmas, upos, xpos, feats, named_entities): feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats] max_len = max(1, max([len(feat) for feat in feats])) feats = [feat + [""] * (max_len - len(feat)) for feat in feats] feats = list(zip(*feats)) named_entities = [ "" if ne == "O" else f"<< {ne.split('-')[1]} >>" if ne.startswith("B") else ne.split('-')[1] if (ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I")) else f"{ne.split('-')[1]} >>" for i, ne in enumerate(named_entities) ] array = [ [""] + forms, ["*LEMMAS:*"] + lemmas, ["*UPOS:*"] + upos, ["*XPOS:*"] + xpos, ["*UFEATS:*"] + list(feats[0]), *([""] + list(row) for row in feats[1:]) ["*NE:*"] + named_entities, ] #return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml") return {"value": array[1:], "headers": array[0]} custom_css = \ """ /* Hide sort buttons at gr.DataFrame */ .sort-button { display: none !important; } """ with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo: gr.HTML(description) with gr.Row(): with gr.Column(scale=1, variant="panel"): source = gr.Textbox( label="Input sentence", placeholder="Write a sentende to parse", show_label=False, lines=1, max_lines=5, autofocus=True ) submit = gr.Button("Submit", variant="primary") with gr.Column(scale=1, variant="panel"): dataset = gr.Dataset(components=[gr.Textbox(visible=False)], label="Input examples", samples=[ ["Thomassen er på vei til sin neste gjerning."], ["På toppen av dette kom de metodiske utfordringer."], ["Berntsen har påtatt seg en både viktig og vanskelig oppgave."], ["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."], ] ) table = gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, ne), interactive=False, datatype="markdown") dependency_plot = gr.Plot(render_dependency_tree(forms, edges, edge_labels), container=False) source.submit( fn=parse, inputs=["source"], outputs=["dependency_plot", "table"], queue=True ) submit.click( fn=parse, inputs=["source"], outputs=["dependency_plot", "table"], queue=True ) dataset.click( fn=lambda text: text, inputs=["dataset"], outputs=["source"] ).then( fn=parse, inputs=["source"], outputs=["dependency_plot", "table"], queue=True ) demo.queue(max_size=32, concurrency_count=2) demo.launch()