Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,11 +19,27 @@ st.set_page_config(
|
|
19 |
|
20 |
# Sidebar
|
21 |
st.sidebar.image("logo-wordlift.png")
|
22 |
-
language_options = {"English", "German"}
|
23 |
selected_language = st.sidebar.selectbox("Select the Language", list(language_options), index=0)
|
24 |
|
25 |
# Based on selected language, configure model, entity set, and citation options
|
26 |
-
if selected_language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
model_options = ["aida_model", "wikipedia_model_with_numbers"]
|
28 |
entity_set_options = ["wikidata", "wikipedia"]
|
29 |
|
@@ -41,22 +57,6 @@ if selected_language != "German":
|
|
41 |
|
42 |
with st.sidebar.expander('Citations'):
|
43 |
st.markdown(refined_citation)
|
44 |
-
else:
|
45 |
-
selected_model_name = None
|
46 |
-
selected_entity_set = None
|
47 |
-
|
48 |
-
entity_fishing_citation = """
|
49 |
-
@misc{entity-fishing,
|
50 |
-
title = {entity-fishing},
|
51 |
-
publisher = {GitHub},
|
52 |
-
year = {2016--2023},
|
53 |
-
archivePrefix = {swh},
|
54 |
-
eprint = {1:dir:cb0ba3379413db12b0018b7c3af8d0d2d864139c}
|
55 |
-
}
|
56 |
-
"""
|
57 |
-
|
58 |
-
with st.sidebar.expander('Citations'):
|
59 |
-
st.markdown(entity_fishing_citation)
|
60 |
|
61 |
@st.cache_resource # ๐ Add the caching decorator
|
62 |
def load_model(selected_language, model_name=None, entity_set=None):
|
@@ -66,6 +66,12 @@ def load_model(selected_language, model_name=None, entity_set=None):
|
|
66 |
nlp_model_de.add_pipe("entityfishing")
|
67 |
|
68 |
return nlp_model_de
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
else:
|
70 |
# Load the pretrained model for other languages
|
71 |
refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
|
@@ -101,20 +107,19 @@ entities_map = {}
|
|
101 |
entities_data = {}
|
102 |
|
103 |
if text_input:
|
104 |
-
if selected_language
|
105 |
doc_de = model(text_input)
|
106 |
entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
|
107 |
for entity in entities:
|
108 |
entity_string, entity_type, wikidata_id, wikidata_url = entity
|
109 |
if wikidata_url:
|
110 |
-
# Ensure correct format for the German model
|
111 |
formatted_wikidata_url = wikidata_url.replace("https://www.wikidata.org/wiki/", "http://www.wikidata.org/entity/")
|
112 |
entities_map[entity_string] = {"id": wikidata_id, "link": formatted_wikidata_url}
|
113 |
entity_data = get_entity_data(formatted_wikidata_url)
|
114 |
|
115 |
if entity_data is not None:
|
116 |
entities_data[entity_string] = entity_data
|
117 |
-
|
118 |
else:
|
119 |
entities = model.process_text(text_input)
|
120 |
|
|
|
19 |
|
20 |
# Sidebar
|
21 |
st.sidebar.image("logo-wordlift.png")
|
22 |
+
language_options = {"English", "English - spaCy", "German"}
|
23 |
selected_language = st.sidebar.selectbox("Select the Language", list(language_options), index=0)
|
24 |
|
25 |
# Based on selected language, configure model, entity set, and citation options
|
26 |
+
if selected_language == "German" or selected_language == "English - spaCy":
|
27 |
+
selected_model_name = None
|
28 |
+
selected_entity_set = None
|
29 |
+
|
30 |
+
entity_fishing_citation = """
|
31 |
+
@misc{entity-fishing,
|
32 |
+
title = {entity-fishing},
|
33 |
+
publisher = {GitHub},
|
34 |
+
year = {2016--2023},
|
35 |
+
archivePrefix = {swh},
|
36 |
+
eprint = {1:dir:cb0ba3379413db12b0018b7c3af8d0d2d864139c}
|
37 |
+
}
|
38 |
+
"""
|
39 |
+
|
40 |
+
with st.sidebar.expander('Citations'):
|
41 |
+
st.markdown(entity_fishing_citation)
|
42 |
+
else:
|
43 |
model_options = ["aida_model", "wikipedia_model_with_numbers"]
|
44 |
entity_set_options = ["wikidata", "wikipedia"]
|
45 |
|
|
|
57 |
|
58 |
with st.sidebar.expander('Citations'):
|
59 |
st.markdown(refined_citation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
@st.cache_resource # ๐ Add the caching decorator
|
62 |
def load_model(selected_language, model_name=None, entity_set=None):
|
|
|
66 |
nlp_model_de.add_pipe("entityfishing")
|
67 |
|
68 |
return nlp_model_de
|
69 |
+
elif selected_language == "English":
|
70 |
+
# Load English-specific model
|
71 |
+
nlp_model_en = spacy.load("en_core_web_sm")
|
72 |
+
nlp_model_en.add_pipe("entityfishing")
|
73 |
+
|
74 |
+
return nlp_model_en
|
75 |
else:
|
76 |
# Load the pretrained model for other languages
|
77 |
refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
|
|
|
107 |
entities_data = {}
|
108 |
|
109 |
if text_input:
|
110 |
+
if selected_language in ["German", "English - spaCy"]::
|
111 |
doc_de = model(text_input)
|
112 |
entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
|
113 |
for entity in entities:
|
114 |
entity_string, entity_type, wikidata_id, wikidata_url = entity
|
115 |
if wikidata_url:
|
116 |
+
# Ensure correct format for the German and English model
|
117 |
formatted_wikidata_url = wikidata_url.replace("https://www.wikidata.org/wiki/", "http://www.wikidata.org/entity/")
|
118 |
entities_map[entity_string] = {"id": wikidata_id, "link": formatted_wikidata_url}
|
119 |
entity_data = get_entity_data(formatted_wikidata_url)
|
120 |
|
121 |
if entity_data is not None:
|
122 |
entities_data[entity_string] = entity_data
|
|
|
123 |
else:
|
124 |
entities = model.process_text(text_input)
|
125 |
|