File size: 934 Bytes
acd7cf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def detect_main_language(text):
    """
    识别文本的主要语言

    :param text:
    :return:
    """
    assert isinstance(text, str)
    def is_chinese_char(char):
        return '\u4e00' <= char <= '\u9fff'

    def is_english_char(char):
        return char.isascii() and char.isalpha()

    # 去除空格和标点符号
    text = ''.join(char for char in text if char.strip())

    chinese_count = sum(1 for char in text if is_chinese_char(char))
    english_count = sum(1 for char in text if is_english_char(char))

    total = chinese_count + english_count
    if total == 0:
        return 'en'

    chinese_ratio = chinese_count / total

    if chinese_ratio >= 0.5:
        return 'zh'
    return 'en'

def detect_if_chinese(text):
    """
    判断文本是否包含有中文

    :param text:
    :return:
    """

    assert isinstance(text, str)
    return any('\u4e00' <= char <= '\u9fff' for char in text)