import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import requests tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual") model = AutoModelForSeq2SeqLM.from_pretrained( "impresso-project/nel-hipe-multilingual" ).eval() print("Model loaded successfully!") def get_wikipedia_title(qid, language="en"): url = f"https://www.wikidata.org/w/api.php" params = { "action": "wbgetentities", "format": "json", "ids": qid, "props": "sitelinks/urls", "sitefilter": f"{language}wiki", } response = requests.get(url, params=params) data = response.json() try: title = data["entities"][qid]["sitelinks"][f"{language}wiki"]["title"] url = data["entities"][qid]["sitelinks"][f"{language}wiki"]["url"] return title, url except KeyError: return "NIL", "None" def disambiguate_sentence(sentence): entities = [] # Generate model outputs for the sentence outputs = model.generate( **tokenizer([sentence], return_tensors="pt"), num_beams=5, num_return_sequences=5, max_new_tokens=30, ) decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True) qid = decoded[0].split()[-1] # Assuming QID is the last token in the output # Get Wikipedia title and URL title, url = get_wikipedia_title(qid) entity_info = f"QID: {qid}, Title: {title}, URL: {url}" entities.append(entity_info) print(f"Entities: {entities}") return {"text": sentence, "entities": entities} def nel_app_interface(): input_sentence = gr.Textbox( lines=5, label="Input Sentence", placeholder="Enter your sentence here in the following format: // << We are going to [START] Paris [END]. >>" " // This format ensures that the model knows which entities to disambiguate, more exactly the " "entity should be surrounded by `[START]` and `[END]`. // " "!Only one entity per sentence is supported at the moment!", ) output_entities = gr.Textbox(label="Linked Entities") # Interface definition interface = gr.Interface( fn=disambiguate_sentence, inputs=input_sentence, outputs=output_entities, title="Entity Linking with impresso-project/nel-hipe-multilingual", description="Link entities using the `impresso-project/nel-hipe-multilingual` model under the hood!", examples=[ [ "Des chercheurs de l' [START] Université de Cambridge [END] ont développé une nouvelle technique de calcul " "quantique qui promet d'augmenter exponentiellement les vitesses de calcul.", "Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. ([START] " "Reuters [END])", "In the [START] year 1789 [END], the Estates-General was convened in France.", "[START] King Louis XVI, ruler of France [END], called for the meeting.", "The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy.", "At Versailles, Marie Antoinette, the Queen of France, was involved in discussions.", "Maximilien Robespierre, a leading member of the National Assembly, also participated.", "[START] Jean-Jacques Rousseau, the famous philosopher [END], was a significant figure in the debate.", "Another important participant was [START] Charles de Talleyrand, the Bishop of Autun [END].", "Meanwhile, across the Atlantic, [START] George Washington, the first President of the United States [END], " "was shaping policies.", "[START] Thomas Jefferson, the nation's Secretary of State [END], played a key role in drafting policies for " "the new American government.", ] ], ) interface.launch() if __name__ == "__main__": nel_app_interface()