File size: 1,786 Bytes
57e0bea
 
 
 
0742d78
57e0bea
 
 
 
 
 
 
 
 
 
 
 
 
0742d78
57e0bea
 
0742d78
 
 
 
 
 
 
 
 
 
 
 
 
 
57e0bea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
from transformers import pipeline
from typing import List, Dict, Any

def merge_tokens(tokens: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    merged_tokens = []
    for token in tokens:
        if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
            last_token = merged_tokens[-1]
            last_token['word'] += token['word'].replace('##', '')
            last_token['end'] = token['end']
            last_token['score'] = (last_token['score'] + token['score']) / 2
        else:
            merged_tokens.append(token)
    return merged_tokens

get_completion = pipeline("ner", model="b3x0m/bert-xomlac-ner")

def ner(input: str) -> str:
    output = get_completion(input)
    merged_tokens = merge_tokens(output)
    entity_map = {
        "PER": "tên người",
        "LOC": "địa điểm",
        "ORG": "tổ chức",
        "MISC": "vị trí",
    }
    result = []
    for token in merged_tokens:
        entity = token['entity']
        if entity in entity_map:  # Filter only relevant entities
            entity_label = entity_map.get(entity, "khác")
            result.append(f"{token['word']} ({entity_label})")
    
    return ", ".join(result)

css = '''
h1#title {
  text-align: center;
}
'''

theme = gr.themes.Soft()
demo = gr.Blocks(css=css, theme=theme)

with demo:
  interface = gr.Interface(fn=ner,
                    inputs=[gr.Textbox(label="Input text", lines=10)],
                    outputs=[gr.HighlightedText(label="Output")],
                    allow_flagging="never",
                    examples=["灵符山道场之外,玄玉子、赵成等诸多灵符山高层落座。", "李雷和韩梅梅今天一起去北京旅游。"])

demo.launch()