Spaces:
Sleeping
Sleeping
Commit
·
3aa7322
1
Parent(s):
100b317
Update app
Browse files- app.py +90 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import spacy
|
3 |
+
import pandas as pd
|
4 |
+
import datetime
|
5 |
+
|
6 |
+
st.set_page_config(layout="wide")
|
7 |
+
|
8 |
+
default_text = """Ita fac, mi Lucili; vindica te tibi, et tempus, quod adhuc aut auferebatur aut subripiebatur aut excidebat, collige et serva."""
|
9 |
+
|
10 |
+
|
11 |
+
def format_morph(morph):
|
12 |
+
morph = morph.to_dict()
|
13 |
+
if morph:
|
14 |
+
return ", ".join([f"{k}={v}" for k, v in morph.items()])
|
15 |
+
else:
|
16 |
+
return ""
|
17 |
+
|
18 |
+
|
19 |
+
def analyze_text(text):
|
20 |
+
doc = nlp(text)
|
21 |
+
rows = []
|
22 |
+
for token in doc[:100]:
|
23 |
+
rows.append(
|
24 |
+
(
|
25 |
+
token.text,
|
26 |
+
token.norm_,
|
27 |
+
token.lower_,
|
28 |
+
token.lemma_,
|
29 |
+
token.pos_,
|
30 |
+
token.tag_,
|
31 |
+
token.dep_,
|
32 |
+
format_morph(token.morph),
|
33 |
+
token.ent_type_,
|
34 |
+
)
|
35 |
+
)
|
36 |
+
df = pd.DataFrame(
|
37 |
+
rows,
|
38 |
+
columns=[
|
39 |
+
"text",
|
40 |
+
"norm",
|
41 |
+
"lower",
|
42 |
+
"lemma",
|
43 |
+
"pos",
|
44 |
+
"tag",
|
45 |
+
"dep",
|
46 |
+
"morph",
|
47 |
+
"ent_type",
|
48 |
+
],
|
49 |
+
)
|
50 |
+
return df
|
51 |
+
|
52 |
+
|
53 |
+
st.title("LatinCy Text Analyzer")
|
54 |
+
|
55 |
+
# Using object notation
|
56 |
+
model_selectbox = st.sidebar.selectbox(
|
57 |
+
"Choose model:",
|
58 |
+
("la_core_web_lg", "la_core_web_md", "la_core_web_sm")
|
59 |
+
)
|
60 |
+
|
61 |
+
nlp = spacy.load(model_selectbox)
|
62 |
+
|
63 |
+
df = None
|
64 |
+
|
65 |
+
text = st.text_area(
|
66 |
+
"Enter some text to analyze (max 100 tokens)", value=default_text, height=200
|
67 |
+
)
|
68 |
+
if st.button("Analyze"):
|
69 |
+
df = analyze_text(text)
|
70 |
+
st.text(f"Analyzed {len(df)} tokens with {model_selectbox} model.")
|
71 |
+
st.dataframe(df, width=1000)
|
72 |
+
|
73 |
+
@st.cache_data
|
74 |
+
def convert_df(df):
|
75 |
+
return df.to_csv(index=False, sep="\t").encode("utf-8")
|
76 |
+
|
77 |
+
csv = convert_df(df)
|
78 |
+
|
79 |
+
def create_timestamp():
|
80 |
+
return datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
81 |
+
|
82 |
+
# nb: clicking this button resets app! Open streamlit issue, as of 4.15.2023; cf. https://github.com/streamlit/streamlit/issues/4382
|
83 |
+
st.markdown("*NB: Clicking the download button will reset the app after download!*")
|
84 |
+
st.download_button(
|
85 |
+
"Press to Download",
|
86 |
+
csv,
|
87 |
+
f"latincy-analysis-{create_timestamp()}.tsv",
|
88 |
+
"text/csv",
|
89 |
+
key="download-csv",
|
90 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
la-core-web-lg @ https://huggingface.co/latincy/la_core_web_lg/resolve/main/la_core_web_lg-any-py3-none-any.whl#sha256=03e29fbaae0bf583610f6c042b874441aa213aee238f2a63f413bb608fe6f100
|
2 |
+
la-core-web-md @ https://huggingface.co/latincy/la_core_web_md/resolve/main/la_core_web_md-any-py3-none-any.whl#sha256=6c48e1494a8e892878a5381846fc8b3d7dc1c160b3ae2090098b856aa679bfd4
|
3 |
+
la-core-web-sm @ https://huggingface.co/latincy/la_core_web_sm/resolve/main/la_core_web_sm-any-py3-none-any.whl#sha256=0aecb1b9c9974b48b180092ab4e25b3bdba7c4b7b6cd47942e667cb054f07e04
|
4 |
+
pandas==1.5.3
|
5 |
+
spacy==3.5.2
|