File size: 2,485 Bytes
56b3258
17cebb5
56b3258
61e2291
 
 
 
56b3258
61e2291
 
 
 
 
56b3258
 
 
 
61e2291
56b3258
 
 
 
 
61e2291
56b3258
 
 
61e2291
56b3258
61e2291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56b3258
6113569
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from transformers import pipeline
import gradio as gr

# Cargar modelos
model1 = "gyr66/RoBERTa-ext-large-crf-chinese-finetuned-ner-v2"
model2 = "gyr66/Ernie-3.0-large-chinese-finetuned-ner"
model3 = "gyr66/Ernie-3.0-base-chinese-finetuned-ner"

get_completion1 = pipeline("ner", model1)
get_completion2 = pipeline("ner", model2)
get_completion3 = pipeline("ner", model3)

# Funci贸n para fusionar tokens
def merge_tokens(tokens):
    merged_tokens = []
    for token in tokens:
        if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
            # Si el token contin煤a la entidad del anterior, fusi贸nalos
            last_token = merged_tokens[-1]
            last_token['word'] += token['word'].replace('##', '')
            last_token['end'] = token['end']
            last_token['score'] = (last_token['score'] + token['score']) / 2
        else:
            # De lo contrario, agrega el token a la lista
            merged_tokens.append(token)
    return merged_tokens

# Funci贸n de NER
def ner(input):
    output1 = get_completion1(input)
    output2 = get_completion2(input)
    output3 = get_completion3(input)

    merged_tokens1 = merge_tokens(output1)
    merged_tokens2 = merge_tokens(output2)
    merged_tokens3 = merge_tokens(output3)

    # Formatear la salida para Gradio
    entities1 = [{"entity": t['entity'], "start": t['start'], "end": t['end']} for t in merged_tokens1]
    entities2 = [{"entity": t['entity'], "start": t['start'], "end": t['end']} for t in merged_tokens2]
    entities3 = [{"entity": t['entity'], "start": t['start'], "end": t['end']} for t in merged_tokens3]

    return (
        {"text": input, "entities": entities1},
        {"text": input, "entities": entities2},
        {"text": input, "entities": entities3}
    )

# Crear interfaz Gradio
demo = gr.Interface(
    fn=ner,
    inputs=gr.Textbox(label="Text to find entities", lines=2),
    outputs=[
        gr.HighlightedText(label=f"NER Output - Model 1"),
        gr.HighlightedText(label=f"NER Output - Model 2"),
        gr.HighlightedText(label=f"NER Output - Model 3")
    ],
    title="NER with Multiple Models",
    description="Extract entities using three different models.",
    allow_flagging="never",
    examples=[
        "My name is Andrew, I'm building DeeplearningAI and I live in California",
        "My name is Poli, I live in Vienna and work at HuggingFace"
    ]
)

demo.launch(inline=False)