File size: 2,293 Bytes
615deca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import spacy
import pandas as pd
import datetime

st.set_page_config(page_title="Parsing Demo", layout="wide")
st.sidebar.header("Parsing Demo")

default_text = """Ita fac, mi Lucili; vindica te tibi, et tempus, quod adhuc aut auferebatur aut subripiebatur aut excidebat, collige et serva."""


def format_morph(morph):
    morph = morph.to_dict()
    if morph:
        return ", ".join([f"{k}={v}" for k, v in morph.items()])
    else:
        return ""


def analyze_text(text):
    doc = nlp(text)
    rows = []
    for token in doc[:100]:
        rows.append(
            (
                token.text,
                token.norm_,
                token.lower_,
                token.lemma_,
                token.pos_,
                token.tag_,
                token.dep_,
                format_morph(token.morph),
                token.ent_type_,
            )
        )
    df = pd.DataFrame(
        rows,
        columns=[
            "text",
            "norm",
            "lower",
            "lemma",
            "pos",
            "tag",
            "dep",
            "morph",
            "ent_type",
        ],
    )
    return df


st.title("LatinCy Text Analyzer")

# Using object notation
model_selectbox = st.sidebar.selectbox(
    "Choose model:",
    ("la_core_web_lg", "la_core_web_md", "la_core_web_sm")
)

nlp = spacy.load(model_selectbox)

df = None

text = st.text_area(
    "Enter some text to analyze (max 100 tokens)", value=default_text, height=200
)
if st.button("Analyze"):
    df = analyze_text(text)
    st.text(f"Analyzed {len(df)} tokens with {model_selectbox} model.")
    st.dataframe(df, width=1000)

    @st.cache_data
    def convert_df(df):
        return df.to_csv(index=False, sep="\t").encode("utf-8")

    csv = convert_df(df)

    def create_timestamp():
        return datetime.datetime.now().strftime("%Y%m%d%H%M%S")

    # nb: clicking this button resets app! Open streamlit issue, as of 4.15.2023; cf. https://github.com/streamlit/streamlit/issues/4382
    st.markdown("*NB: Clicking the download button will reset the app after download!*")
    st.download_button(
        "Press to Download",
        csv,
        f"latincy-analysis-{create_timestamp()}.tsv",
        "text/csv",
        key="download-csv",
    )