Spaces:
Running
Running
def detect_main_language(text): | |
""" | |
识别文本的主要语言 | |
:param text: | |
:return: | |
""" | |
assert isinstance(text, str) | |
def is_chinese_char(char): | |
return '\u4e00' <= char <= '\u9fff' | |
def is_english_char(char): | |
return char.isascii() and char.isalpha() | |
# 去除空格和标点符号 | |
text = ''.join(char for char in text if char.strip()) | |
chinese_count = sum(1 for char in text if is_chinese_char(char)) | |
english_count = sum(1 for char in text if is_english_char(char)) | |
total = chinese_count + english_count | |
if total == 0: | |
return 'en' | |
chinese_ratio = chinese_count / total | |
if chinese_ratio >= 0.5: | |
return 'zh' | |
return 'en' | |
def detect_if_chinese(text): | |
""" | |
判断文本是否包含有中文 | |
:param text: | |
:return: | |
""" | |
assert isinstance(text, str) | |
return any('\u4e00' <= char <= '\u9fff' for char in text) | |