Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| import re | |
| from pysbd.abbreviation_replacer import AbbreviationReplacer | |
| from pysbd.between_punctuation import BetweenPunctuation | |
| from pysbd.lang.common import Common, Standard | |
| from pysbd.punctuation_replacer import replace_punctuation | |
| class Chinese(Common, Standard): | |
| iso_code = 'zh' | |
| class AbbreviationReplacer(AbbreviationReplacer): | |
| SENTENCE_STARTERS = [] | |
| class BetweenPunctuation(BetweenPunctuation): | |
| def __init__(self, text): | |
| super().__init__(text) | |
| def replace(self): | |
| self.sub_punctuation_between_quotes_and_parens() | |
| return self.text | |
| def sub_punctuation_between_double_angled_quotation_marks(self): | |
| BETWEEN_DOUBLE_ANGLE_QUOTATION_MARK_REGEX = r"《(?=(?P<tmp>[^》\\]+|\\{2}|\\.)*)(?P=tmp)》" | |
| self.text = re.sub(BETWEEN_DOUBLE_ANGLE_QUOTATION_MARK_REGEX, replace_punctuation, | |
| self.text) | |
| def sub_punctuation_between_l_bracket(self): | |
| BETWEEN_L_BRACKET_REGEX = r"「(?=(?P<tmp>[^」\\]+|\\{2}|\\.)*)(?P=tmp)」" | |
| self.text = re.sub(BETWEEN_L_BRACKET_REGEX, replace_punctuation, | |
| self.text) | |
| def sub_punctuation_between_quotes_and_parens(self): | |
| self.sub_punctuation_between_double_angled_quotation_marks() | |
| self.sub_punctuation_between_l_bracket() | |