Commit
Β·
1a30cc1
1
Parent(s):
44fb8bb
update app
Browse files
app.py
CHANGED
@@ -21,10 +21,6 @@ def get_wikipedia_page_props(input_str: str):
|
|
21 |
Returns:
|
22 |
str: The QID or "NIL" if the QID is not found.
|
23 |
"""
|
24 |
-
# # Check if the result is already in the cache
|
25 |
-
# if input_str in cache:
|
26 |
-
# return cache[input_str]
|
27 |
-
|
28 |
try:
|
29 |
# Preprocess the input string
|
30 |
page_name, language = input_str.split(" >> ")
|
@@ -57,14 +53,11 @@ def get_wikipedia_page_props(input_str: str):
|
|
57 |
if "wikibase_item" in page_props:
|
58 |
return page_props["wikibase_item"]
|
59 |
else:
|
60 |
-
|
61 |
-
return qid # fallback_to_openrefine(page_name, language)
|
62 |
else:
|
63 |
-
return qid
|
64 |
-
|
65 |
except Exception as e:
|
66 |
-
|
67 |
-
return qid # fallback_to_openrefine(page_name, language)
|
68 |
|
69 |
|
70 |
def get_wikipedia_title(qid, language="en"):
|
@@ -89,7 +82,6 @@ def get_wikipedia_title(qid, language="en"):
|
|
89 |
|
90 |
|
91 |
def disambiguate_sentence(sentence):
|
92 |
-
entities = []
|
93 |
# Generate model outputs for the sentence
|
94 |
outputs = model.generate(
|
95 |
**tokenizer([sentence], return_tensors="pt"),
|
@@ -99,18 +91,25 @@ def disambiguate_sentence(sentence):
|
|
99 |
)
|
100 |
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
101 |
print(f"Decoded: {decoded}")
|
102 |
-
wikipedia_name = decoded[0] # Assuming
|
103 |
qid = get_wikipedia_page_props(wikipedia_name)
|
104 |
-
print(f"
|
105 |
-
|
106 |
-
#
|
107 |
title, url = get_wikipedia_title(qid)
|
108 |
-
#
|
109 |
-
entity_info = f"QID: {qid}, Title: {title}, URL: {url}"
|
110 |
-
entities.append(entity_info)
|
111 |
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
|
116 |
def nel_app_interface():
|
@@ -122,7 +121,7 @@ def nel_app_interface():
|
|
122 |
"entity should be surrounded by `[START]` and `[END]`. // "
|
123 |
"!Only one entity per sentence is supported at the moment!",
|
124 |
)
|
125 |
-
output_entities = gr.
|
126 |
|
127 |
# Interface definition
|
128 |
interface = gr.Interface(
|
|
|
21 |
Returns:
|
22 |
str: The QID or "NIL" if the QID is not found.
|
23 |
"""
|
|
|
|
|
|
|
|
|
24 |
try:
|
25 |
# Preprocess the input string
|
26 |
page_name, language = input_str.split(" >> ")
|
|
|
53 |
if "wikibase_item" in page_props:
|
54 |
return page_props["wikibase_item"]
|
55 |
else:
|
56 |
+
return qid
|
|
|
57 |
else:
|
58 |
+
return qid
|
|
|
59 |
except Exception as e:
|
60 |
+
return qid
|
|
|
61 |
|
62 |
|
63 |
def get_wikipedia_title(qid, language="en"):
|
|
|
82 |
|
83 |
|
84 |
def disambiguate_sentence(sentence):
|
|
|
85 |
# Generate model outputs for the sentence
|
86 |
outputs = model.generate(
|
87 |
**tokenizer([sentence], return_tensors="pt"),
|
|
|
91 |
)
|
92 |
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
93 |
print(f"Decoded: {decoded}")
|
94 |
+
wikipedia_name = decoded[0] # Assuming the entity name is in the output
|
95 |
qid = get_wikipedia_page_props(wikipedia_name)
|
96 |
+
print(f"QID: {qid}")
|
97 |
+
|
98 |
+
# Get Wikipedia title and URL
|
99 |
title, url = get_wikipedia_title(qid)
|
|
|
|
|
|
|
100 |
|
101 |
+
if qid == "NIL":
|
102 |
+
return "No entity found."
|
103 |
+
|
104 |
+
# Create an HTML output with a clickable link
|
105 |
+
entity_info = f"""
|
106 |
+
<div>
|
107 |
+
<strong>Entity:</strong> {title} <br>
|
108 |
+
<strong>QID:</strong> {qid} <br>
|
109 |
+
<a href="{url}" target="_blank">Wikipedia Page</a>
|
110 |
+
</div>
|
111 |
+
"""
|
112 |
+
return entity_info
|
113 |
|
114 |
|
115 |
def nel_app_interface():
|
|
|
121 |
"entity should be surrounded by `[START]` and `[END]`. // "
|
122 |
"!Only one entity per sentence is supported at the moment!",
|
123 |
)
|
124 |
+
output_entities = gr.HTML(label="Linked Entity")
|
125 |
|
126 |
# Interface definition
|
127 |
interface = gr.Interface(
|