# Copyright (c) 2024 Alibaba Inc # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from abc import ABC from abc import abstractmethod from typing import Iterable from typing import List class AbsTokenizer(ABC): @abstractmethod def text2tokens(self, line: str) -> List[str]: raise NotImplementedError @abstractmethod def tokens2text(self, tokens: Iterable[str]) -> str: raise NotImplementedError def encode(self, line: str, **kwargs) -> List[str]: return self.text2tokens(line)