File size: 843 Bytes
4aa700d
a640db1
 
 
 
8c22397
4aa700d
 
 
 
 
a640db1
 
 
 
 
 
 
 
 
4aa700d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
from typing import Dict, List, Any
from tokenizers.decoders import WordPiece


class EndpointHandler:
    def __init__(self, path="."):
        model = AutoModelForTokenClassification.from_pretrained(path)
        tokenizer = AutoTokenizer.from_pretrained(path)
        self.pipeline = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy='simple')
        self.pipeline.tokenizer.backend_tokenizer.decoder = WordPiece()

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        data args:
            inputs (:obj: `str` | `PIL.Image` | `np.array`)
            kwargs
        Return:
            A :obj:`list` | `dict`: will be serialized and returned
        """
        return self.pipeline(data['inputs'])