Update ✨Entity Linking Application✨.py
Browse files- ✨Entity Linking Application✨.py +12 -13
✨Entity Linking Application✨.py
CHANGED
@@ -62,8 +62,8 @@ async def combination_method(name, session):
|
|
62 |
x = itertools_combinations(new_name, 2)
|
63 |
for i in x:
|
64 |
new_word = (i[0] + " " + i[1])
|
65 |
-
url = f"{new_word} site:en.wikipedia.org inurl:/wiki/ -inurl:?"
|
66 |
-
s = search(url, num_results = 12, lang="en")
|
67 |
for i in s:
|
68 |
data.add(i.split("/")[-1])
|
69 |
return data
|
@@ -73,7 +73,7 @@ async def single_method(name, session):
|
|
73 |
data = set()
|
74 |
new_name = name.replace("-", " ").replace("/", " ").split()
|
75 |
for i in new_name:
|
76 |
-
url = f"{i} site:en.wikipedia.org inurl:/wiki/ -inurl:?"
|
77 |
s = search(url, num_results = 12, lang="en")
|
78 |
for i in s:
|
79 |
data.add(i.split("/")[-1])
|
@@ -85,7 +85,7 @@ async def mains(name, single, combi):
|
|
85 |
qids = set()
|
86 |
|
87 |
async with aiohttp.ClientSession() as session:
|
88 |
-
url = f"{name} site:en.wikipedia.org inurl:/wiki/ -inurl:?"
|
89 |
s = search(url, num_results = 30, lang="en")
|
90 |
for i in s:
|
91 |
data.add(i.split("/")[-1])
|
@@ -267,17 +267,17 @@ async def main(name):
|
|
267 |
with open(f"/home/user/app/info_extraction/{name}.json", "w", encoding="utf-8") as flast:
|
268 |
json.dump(final_list, flast)
|
269 |
|
270 |
-
def check_sentence(sentence):
|
271 |
-
two_consecutive_uppercase = r"[A-Z]{2}"
|
272 |
-
uppercase_followed_by_fullstop = r"[A-Z]\."
|
273 |
|
274 |
-
if re.search(two_consecutive_uppercase, sentence):
|
275 |
-
return True
|
276 |
|
277 |
-
if re.search(uppercase_followed_by_fullstop, sentence):
|
278 |
-
return True
|
279 |
|
280 |
-
return False
|
281 |
|
282 |
chrome_driver_path = "chromedriver.exe"
|
283 |
chrome_path = r'"C:\Program Files\Google\Chrome\Application\chrome.exe"'
|
@@ -428,7 +428,6 @@ def main_cli():
|
|
428 |
st.write(f"Applying Candidate Selection module... (4/5) [{number}/{len(list_with_full_names)}]")
|
429 |
with open(f"/home/user/app/info_extraction/{i}.json", "r") as f:
|
430 |
json_file = json.load(f)
|
431 |
-
print(json_file)
|
432 |
lista = []
|
433 |
lista_1 = []
|
434 |
for element in json_file:
|
|
|
62 |
x = itertools_combinations(new_name, 2)
|
63 |
for i in x:
|
64 |
new_word = (i[0] + " " + i[1])
|
65 |
+
url = f"{new_word} site:en.wikipedia.org inurl:/wiki/ -inurl:? -inurl:Category: -inurl:Help: -inurl:Special: -inurl:File:"
|
66 |
+
s = search(url, num_results = 12, lang="en")l
|
67 |
for i in s:
|
68 |
data.add(i.split("/")[-1])
|
69 |
return data
|
|
|
73 |
data = set()
|
74 |
new_name = name.replace("-", " ").replace("/", " ").split()
|
75 |
for i in new_name:
|
76 |
+
url = f"{i} site:en.wikipedia.org inurl:/wiki/ -inurl:? -inurl:Category: -inurl:Help: -inurl:Special: -inurl:File:"
|
77 |
s = search(url, num_results = 12, lang="en")
|
78 |
for i in s:
|
79 |
data.add(i.split("/")[-1])
|
|
|
85 |
qids = set()
|
86 |
|
87 |
async with aiohttp.ClientSession() as session:
|
88 |
+
url = f"{name} site:en.wikipedia.org inurl:/wiki/ -inurl:? -inurl:Category: -inurl:Help: -inurl:Special: -inurl:File:"
|
89 |
s = search(url, num_results = 30, lang="en")
|
90 |
for i in s:
|
91 |
data.add(i.split("/")[-1])
|
|
|
267 |
with open(f"/home/user/app/info_extraction/{name}.json", "w", encoding="utf-8") as flast:
|
268 |
json.dump(final_list, flast)
|
269 |
|
270 |
+
#def check_sentence(sentence):
|
271 |
+
# two_consecutive_uppercase = r"[A-Z]{2}"
|
272 |
+
# uppercase_followed_by_fullstop = r"[A-Z]\."
|
273 |
|
274 |
+
# if re.search(two_consecutive_uppercase, sentence):
|
275 |
+
# return True
|
276 |
|
277 |
+
# if re.search(uppercase_followed_by_fullstop, sentence):
|
278 |
+
# return True
|
279 |
|
280 |
+
# return False
|
281 |
|
282 |
chrome_driver_path = "chromedriver.exe"
|
283 |
chrome_path = r'"C:\Program Files\Google\Chrome\Application\chrome.exe"'
|
|
|
428 |
st.write(f"Applying Candidate Selection module... (4/5) [{number}/{len(list_with_full_names)}]")
|
429 |
with open(f"/home/user/app/info_extraction/{i}.json", "r") as f:
|
430 |
json_file = json.load(f)
|
|
|
431 |
lista = []
|
432 |
lista_1 = []
|
433 |
for element in json_file:
|