cyberandy commited on
Commit
117cafd
·
1 Parent(s): 84d96ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -54
app.py CHANGED
@@ -3,6 +3,11 @@ from annotated_text import annotated_text
3
  from refined.inference.processor import Refined
4
  import requests
5
  import json
 
 
 
 
 
6
 
7
  # Page config
8
  st.set_page_config(
@@ -18,6 +23,8 @@ st.set_page_config(
18
 
19
  # Sidebar
20
  st.sidebar.image("logo-wordlift.png")
 
 
21
 
22
  # Initiate the model
23
  model_options = {"aida_model", "wikipedia_model_with_numbers"}
@@ -68,61 +75,71 @@ with st.form(key='my_form'):
68
  text_input = st.text_area(label='Enter a sentence')
69
  submit_button = st.form_submit_button(label='Analyze')
70
 
71
- # Process the text and extract the entities
72
  if text_input:
73
- entities = refined_model.process_text(text_input)
74
-
75
- entities_map = {}
76
- entities_data = {}
77
- for entity in entities:
78
- single_entity_list = str(entity).strip('][').replace("\'", "").split(', ')
79
- if len(single_entity_list) >= 2 and "wikidata" in single_entity_list[1]:
80
- entities_map[single_entity_list[0].strip()] = get_wikidata_id(single_entity_list[1])
81
- entity_data = get_entity_data(entities_map[single_entity_list[0].strip()]["link"])
82
- if entity_data is not None:
83
- entities_data[single_entity_list[0].strip()] = entity_data
84
-
85
- combined_entity_info_dictionary = dict([(k, [entities_map[k], entities_data[k] if k in entities_data else None]) for k in entities_map])
86
-
87
- if submit_button:
88
- # Prepare a list to hold the final output
89
- final_text = []
90
-
91
- # JSON-LD data
92
- json_ld_data = {
93
- "@context": "https://schema.org",
94
- "@type": "WebPage",
95
- "mentions": []
96
- }
97
-
98
- # Replace each entity in the text with its annotated version
99
- for entity_string, entity_info in entities_map.items():
100
- entity_data = entities_data.get(entity_string, None)
101
- entity_type = None
102
- if entity_data is not None:
103
- entity_type = entity_data.get("@type", None)
104
-
105
- # Use different colors based on the entity's type
106
- color = "#8ef" # Default color
107
- if entity_type == "Place":
108
- color = "#8AC7DB"
109
- elif entity_type == "Organization":
110
- color = "#ADD8E6"
111
- elif entity_type == "Person":
112
- color = "#67B7D1"
113
- elif entity_type == "Product":
114
- color = "#2ea3f2"
115
- elif entity_type == "CreativeWork":
116
- color = "#00BFFF"
117
- elif entity_type == "Event":
118
- color = "#1E90FF"
119
-
120
- entity_annotation = (entity_string, entity_info["id"], color)
121
- text_input = text_input.replace(entity_string, f'{{{str(entity_annotation)}}}', 1)
122
-
123
- # Add the entity to JSON-LD data
124
- entity_json_ld = combined_entity_info_dictionary[entity_string][1]
125
- json_ld_data["mentions"].append(entity_json_ld)
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Split the modified text_input into a list
128
  text_list = text_input.split("{")
 
3
  from refined.inference.processor import Refined
4
  import requests
5
  import json
6
+ import spacy
7
+
8
+ # Load German model
9
+ nlp_model_de = spacy.load("de_core_news_sm")
10
+ nlp_model_de.add_pipe("entityfishing", config={"language": "de"})
11
 
12
  # Page config
13
  st.set_page_config(
 
23
 
24
  # Sidebar
25
  st.sidebar.image("logo-wordlift.png")
26
+ language_options = {"English", "German"}
27
+ selected_language = st.sidebar.selectbox("Select the Language", list(language_options))
28
 
29
  # Initiate the model
30
  model_options = {"aida_model", "wikipedia_model_with_numbers"}
 
75
  text_input = st.text_area(label='Enter a sentence')
76
  submit_button = st.form_submit_button(label='Analyze')
77
 
 
78
  if text_input:
79
+ if selected_language == "German":
80
+ doc_de = nlp_model_de(text_input)
81
+ entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
82
+ # You will have to adjust the rest of the code since the format is different
83
+ # For the demo, we'll simply print them for now
84
+ for entity in entities:
85
+ st.write(entity)
86
+
87
+ else:
88
+ entities = refined_model.process_text(text_input)
89
+
90
+ entities = refined_model.process_text(text_input)
91
+
92
+ entities_map = {}
93
+ entities_data = {}
94
+ for entity in entities:
95
+ single_entity_list = str(entity).strip('][').replace("\'", "").split(', ')
96
+ if len(single_entity_list) >= 2 and "wikidata" in single_entity_list[1]:
97
+ entities_map[single_entity_list[0].strip()] = get_wikidata_id(single_entity_list[1])
98
+ entity_data = get_entity_data(entities_map[single_entity_list[0].strip()]["link"])
99
+ if entity_data is not None:
100
+ entities_data[single_entity_list[0].strip()] = entity_data
101
+
102
+ combined_entity_info_dictionary = dict([(k, [entities_map[k], entities_data[k] if k in entities_data else None]) for k in entities_map])
103
+
104
+ if submit_button:
105
+ # Prepare a list to hold the final output
106
+ final_text = []
107
+
108
+ # JSON-LD data
109
+ json_ld_data = {
110
+ "@context": "https://schema.org",
111
+ "@type": "WebPage",
112
+ "mentions": []
113
+ }
114
+
115
+ # Replace each entity in the text with its annotated version
116
+ for entity_string, entity_info in entities_map.items():
117
+ entity_data = entities_data.get(entity_string, None)
118
+ entity_type = None
119
+ if entity_data is not None:
120
+ entity_type = entity_data.get("@type", None)
121
+
122
+ # Use different colors based on the entity's type
123
+ color = "#8ef" # Default color
124
+ if entity_type == "Place":
125
+ color = "#8AC7DB"
126
+ elif entity_type == "Organization":
127
+ color = "#ADD8E6"
128
+ elif entity_type == "Person":
129
+ color = "#67B7D1"
130
+ elif entity_type == "Product":
131
+ color = "#2ea3f2"
132
+ elif entity_type == "CreativeWork":
133
+ color = "#00BFFF"
134
+ elif entity_type == "Event":
135
+ color = "#1E90FF"
136
+
137
+ entity_annotation = (entity_string, entity_info["id"], color)
138
+ text_input = text_input.replace(entity_string, f'{{{str(entity_annotation)}}}', 1)
139
+
140
+ # Add the entity to JSON-LD data
141
+ entity_json_ld = combined_entity_info_dictionary[entity_string][1]
142
+ json_ld_data["mentions"].append(entity_json_ld)
143
 
144
  # Split the modified text_input into a list
145
  text_list = text_input.split("{")