Spaces:

impresso-project
/

multilingual-entity-linking

Sleeping

App Files Files Community

emanuelaboros commited on Oct 17, 2024

Commit

048754e

1 Parent(s): f36584d

changed the loading of the model

Browse files

Files changed (1) hide show

app.py +23 -24

app.py CHANGED Viewed

@@ -1,11 +1,24 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import requests
-tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual")
-model = AutoModelForSeq2SeqLM.from_pretrained(
-    "impresso-project/nel-hipe-multilingual"
-).eval()
 print("Model loaded successfully!")
@@ -83,29 +96,15 @@ def get_wikipedia_title(qid, language="en"):
 def disambiguate_sentence(sentence):
     # Generate model outputs for the sentence
-    outputs = model.generate(
-        **tokenizer([sentence], return_tensors="pt"),
-        num_beams=5,
-        num_return_sequences=5,
-        max_new_tokens=30,
-    )
-    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-    print(f"Decoded: {decoded}")
-    wikipedia_name = decoded[0]  # Assuming the entity name is in the output
-    qid = get_wikipedia_page_props(wikipedia_name)
-    print(f"QID: {qid}")
-    # Get Wikipedia title and URL
-    title, url = get_wikipedia_title(qid)
-    if qid == "NIL":
-        return "No entity found."
     # Create an HTML output with a clickable link
     entity_info = f"""<div>
-        <strong>Entity:</strong> {title} <br>
-        <strong>QID:</strong> {qid} <br>
-        <a href="{url}" target="_blank">Wikipedia Page</a>
     </div>
     """
     return entity_info
@@ -117,7 +116,7 @@ def nel_app_interface():
         label="Input Sentence",
         placeholder="Enter your sentence here:",
     )
-    output_entities = gr.HTML(label="Linked Entity")
     # Interface definition
     interface = gr.Interface(

 import gradio as gr
+from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import requests
+NEL_MODEL_NAME = "impresso-project/nel-mgenre-multilingual"
+# Load the tokenizer and model from the specified pre-trained model name
+# The model used here is "https://huggingface.co/impresso-project/nel-mgenre-multilingual"
+nel_tokenizer = AutoTokenizer.from_pretrained(
+    "impresso-project/nel-mgenre-multilingual"
+)
+nel_pipeline = pipeline(
+    "generic-nel",
+    model=NEL_MODEL_NAME,
+    tokenizer=nel_tokenizer,
+    trust_remote_code=True,
+    device="cpu",
+)
 print("Model loaded successfully!")
 def disambiguate_sentence(sentence):
     # Generate model outputs for the sentence
+    linked_entity = nel_pipeline(sentence)
+    linked_entity = linked_entity[0]
     # Create an HTML output with a clickable link
     entity_info = f"""<div>
+        <strong>Entity:</strong> {linked_entity['title']} <br>
+        <strong>QID:</strong> {linked_entity['qid']} <br>
+        <a href="{linked_entity['url']}" target="_blank">Wikipedia Page</a>
     </div>
     """
     return entity_info
         label="Input Sentence",
         placeholder="Enter your sentence here:",
     )
+    output_entities = gr.HTML(label="Linked Entities:")
     # Interface definition
     interface = gr.Interface(