Spaces:
Running
Running
File size: 4,624 Bytes
bbcf937 59c3f8c bbcf937 44b938c 24d58c0 bbcf937 0bec8b3 542aecd bbcf937 dedd775 bbcf937 c3e1350 dedd775 c3e1350 dedd775 c3e1350 dedd775 320ee5a c3e1350 bbcf937 320ee5a c9574f5 971e940 c9574f5 971e940 44b938c bbcf937 3dac3c5 c9574f5 bbcf937 44b938c bbcf937 f81a6a4 44b938c bbcf937 44b938c bbcf937 542aecd 24d58c0 5cb9d08 c9574f5 44b938c 60d6cd8 44b938c 60d6cd8 44b938c 60d6cd8 44b938c c9574f5 24d58c0 5cb9d08 44b938c 5cb9d08 542aecd 5cb9d08 542aecd 5cb9d08 24d58c0 49703d7 31c00d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import streamlit as st
from annotated_text import annotated_text
from refined.inference.processor import Refined
import requests
import json
# Sidebar
st.sidebar.image("logo-wordlift.png")
# Initiate the model
model_options = {"aida_model", "wikipedia_model_with_numbers"}
selected_model_name = st.sidebar.selectbox("Select the Model", list(model_options))
# Select entity_set
entity_set_options = {"wikidata", "wikipedia"}
selected_entity_set = st.sidebar.selectbox("Select the Entity Set", list(entity_set_options))
@st.cache_resource # 👈 Add the caching decorator
def load_model(model_name, entity_set):
# Load the pretrained model
refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
return refined_model
# Use the cached model
refined_model = load_model(selected_model_name, selected_entity_set)
# Helper functions
def get_wikidata_id(entity_string):
entity_list = entity_string.split("=")
entity_id = str(entity_list[1])
entity_link = "http/www.wikidata.org/entity/" + entity_id
return {"id": entity_id, "link": entity_link}
def get_entity_data(entity_link):
try:
response = requests.get(f'https://api.wordlift.io/id/{entity_link}')
return response.json()
except Exception as e:
print(f"Exception when fetching data for entity: {entity_link}. Exception: {e}")
return None
# Create the form
with st.form(key='my_form'):
text_input = st.text_area(label='Enter a sentence')
submit_button = st.form_submit_button(label='Analyze')
# Process the text and extract the entities
if text_input:
entities = refined_model.process_text(text_input)
entities_map = {}
entities_data = {}
for entity in entities:
single_entity_list = str(entity).strip('][').replace("\'", "").split(', ')
if len(single_entity_list) >= 2 and "wikidata" in single_entity_list[1]:
entities_map[single_entity_list[0].strip()] = get_wikidata_id(single_entity_list[1])
entity_data = get_entity_data(entities_map[single_entity_list[0].strip()]["link"])
if entity_data is not None:
entities_data[single_entity_list[0].strip()] = entity_data
combined_entity_info_dictionary = dict([(k, [entities_map[k], entities_data[k] if k in entities_data else None]) for k in entities_map])
if submit_button:
# Prepare a list to hold the final output
final_text = []
# JSON-LD data
json_ld_data = {
"@context": "https://schema.org",
"@type": "WebPage",
"mentions": []
}
# Replace each entity in the text with its annotated version
for entity_string, entity_info in entities_map.items():
entity_data = entities_data.get(entity_string, None)
entity_type = None
if entity_data is not None:
entity_type = entity_data.get("@type", None)
# Use different colors based on the entity's type
color = "#8ef" # Default color
if entity_type == "Place":
color = "#8AC7DB"
elif entity_type == "Organization":
color = "#ADD8E6"
elif entity_type == "Person":
color = "#67B7D1"
elif entity_type == "Product":
color = "#007aff"
elif entity_type == "CreativeWork":
color = "#00BFFF"
elif entity_type == "Event":
color = "#1E90FF"
entity_annotation = (entity_string, entity_info["id"], color)
text_input = text_input.replace(entity_string, f'{{{str(entity_annotation)}}}', 1)
# Add the entity to JSON-LD data
entity_json_ld = combined_entity_info_dictionary[entity_string][1]
json_ld_data["mentions"].append(entity_json_ld)
# Split the modified text_input into a list
text_list = text_input.split("{")
for item in text_list:
if "}" in item:
item_list = item.split("}")
final_text.append(eval(item_list[0]))
if len(item_list[1]) > 0:
final_text.append(item_list[1])
else:
final_text.append(item)
# Pass the final_text to the annotated_text function
annotated_text(*final_text)
with st.expander("See annotations"):
st.write(combined_entity_info_dictionary)
with st.expander("Here is the final JSON-LD"):
st.json(json_ld_data) # Output JSON-LD |