Spaces:

BramVanroy
/

text-to-amr

Running

File size: 6,182 Bytes

import base64
from collections import Counter

import graphviz
import penman
from mbart_amr.data.linearization import linearized2penmanstr
from penman.models.noop import NoOpModel
import streamlit as st
from transformers import LogitsProcessorList

from utils import get_resources, LANGUAGES, translate

import streamlit as st

st.set_page_config(
    page_title="Text-to-AMR demo by Bram Vanroy",
    page_icon="👩‍💻"
)

st.title("👩‍💻 Multilingual text to AMR ᵇᵉᵗᵃ")

with st.form("input data"):
    text_col, lang_col = st.columns((4, 1))
    text = text_col.text_input(label="Input text")
    src_lang = lang_col.selectbox(label="Language", options=list(LANGUAGES.keys()), index=0)
    submitted = st.form_submit_button("Submit")

error_ct = st.empty()
if submitted:
    text = text.strip()
    if not text:
        error_ct.error("Text cannot be empty!", icon="⚠️")
    else:
        error_ct.info("Generating abstract meaning representation (AMR)...", icon="💻")
        multilingual = src_lang != "English"
        model, tokenizer, logitsprocessor = get_resources(multilingual)
        gen_kwargs = {
            "max_length": model.config.max_length,
            "num_beams": model.config.num_beams,
            "logits_processor": LogitsProcessorList([logitsprocessor])
        }

        linearized = translate(text, src_lang, model, tokenizer, **gen_kwargs)
        penman_str = linearized2penmanstr(linearized)
        error_ct.empty()

        try:
            graph = penman.decode(penman_str, model=NoOpModel())
        except Exception as exc:
            st.write(f"The generated graph is not valid so it cannot be visualized correctly. Below is the closest attempt"
                     f" to a valid graph but note that this is invalid Penman.")
            st.code(penman_str)

            with st.expander("Error trace"):
                st.write(exc)
        else:
            visualized = graphviz.Digraph(node_attr={"color": "#3aafa9", "style": "rounded,filled", "shape": "box",
                                                     "fontcolor": "white"})

            # Count which names occur multiple times, e.g. t/talk-01 t2/talk-01
            nodename_c = Counter([item[2] for item in graph.triples if item[1] == ":instance"])
            # Generated initial nodenames for each variable, e.g. {"t": "talk-01",  "t2": "talk-01"}
            nodenames = {item[0]: item[2] for item in graph.triples if item[1] == ":instance"}

            # Modify nodenames, so that the values are unique, e.g. {"t": "talk-01 (1)",  "t2": "talk-01 (2)"}
            # but only the value occurs more than once
            nodename_str_c = Counter()
            for varname in nodenames:
                nodename = nodenames[varname]
                if nodename_c[nodename] > 1:
                    nodename_str_c[nodename] += 1
                    nodenames[varname] = f"{nodename} ({nodename_str_c[nodename]})"

            def get_node_name(item: str):
                return nodenames[item] if item in nodenames else item

            try:
                for triple in graph.triples:
                    if triple[1] == ":instance":
                        continue
                    else:
                        visualized.edge(get_node_name(triple[0]), get_node_name(triple[2]), label=triple[1])
            except Exception as exc:
                st.write("The generated graph is not valid so it cannot be visualized correctly. Below is the closest attempt"
                         " to a valid graph but note that this is probably invalid Penman.")
                st.code(penman_str)
                st.write("The initial linearized output of the model was:")
                st.code(linearized)

                with st.expander("Error trace"):
                    st.write(exc)
            else:
                st.subheader("Graph visualization")
                st.graphviz_chart(visualized, use_container_width=True)

                # Download link
                def create_download_link(img_bytes: bytes):
                    encoded = base64.b64encode(img_bytes).decode("utf-8")
                    return f'<a href="data:image/png;charset=utf-8;base64,{encoded}" download="amr-graph.png">Download graph</a>'

                img = visualized.pipe(format="png")
                st.markdown(create_download_link(img), unsafe_allow_html=True)

                # Additional info
                st.subheader("Model output and Penman graph")
                st.write("The linearized output of the model (after some post-processing) is:")
                st.code(linearized)
                st.write("When converted into Penman, it looks like this:")
                st.code(penman.encode(graph))


########################
# Information, socials #
########################
st.header("SignON 🤟")

st.markdown("""
<div style="display: flex">
    <img style="margin-right: 1em" alt="SignON logo" src="https://signon-project.eu/wp-content/uploads/2021/05/SignOn_Favicon_500x500px.png" width=64 height=64>
    <p><a href="https://signon-project.eu/" target="_blank" title="SignON homepage">SignON</a> aims to bridge the
     communication gap between deaf, hard-of-hearing and hearing people through an accessible translation service. 
     This service will translate between languages and modalities with particular attention for sign languages.</p>
</div>""", unsafe_allow_html=True)

st.markdown("""[Abstract meaning representation](https://aclanthology.org/W13-2322/) (AMR) 
is a semantic framework to describe meaning relations of sentences as graphs. In the SignON project, AMR is used as
 an interlingua to translate between modalities and languages. To this end, I built MBART models for the task of 
 generating linearized AMR representations from an input sentence, which is show-cased in this demo.
""")


st.header("Contact ✒️")

st.markdown("Would you like  additional functionality in the demo, do you have questions, or just want to get in touch?"
            " Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
            " or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")