Spaces:
Runtime error
Runtime error
| import os | |
| from os import path | |
| import sys | |
| import json | |
| from .detector import Detector | |
| from .lang_detect_exception import ErrorCode, LangDetectException | |
| from .utils.lang_profile import LangProfile | |
| class DetectorFactory(object): | |
| ''' | |
| Language Detector Factory Class. | |
| This class manages an initialization and constructions of Detector. | |
| Before using language detection library, | |
| load profiles with DetectorFactory.load_profile(str) | |
| and set initialization parameters. | |
| When the language detection, | |
| construct Detector instance via DetectorFactory.create(). | |
| See also Detector's sample code. | |
| ''' | |
| seed = None | |
| def __init__(self): | |
| self.word_lang_prob_map = {} | |
| self.langlist = [] | |
| def load_profile(self, profile_directory): | |
| list_files = os.listdir(profile_directory) | |
| if not list_files: | |
| raise LangDetectException(ErrorCode.NeedLoadProfileError, 'Not found profile: ' + profile_directory) | |
| langsize, index = len(list_files), 0 | |
| for filename in list_files: | |
| if filename.startswith('.'): | |
| continue | |
| filename = path.join(profile_directory, filename) | |
| if not path.isfile(filename): | |
| continue | |
| f = None | |
| try: | |
| if sys.version_info[0] < 3: | |
| f = open(filename, 'r') | |
| else: | |
| f = open(filename, 'r', encoding='utf-8') | |
| json_data = json.load(f) | |
| profile = LangProfile(**json_data) | |
| self.add_profile(profile, index, langsize) | |
| index += 1 | |
| except IOError: | |
| raise LangDetectException(ErrorCode.FileLoadError, 'Cannot open "%s"' % filename) | |
| except: | |
| raise LangDetectException(ErrorCode.FormatError, 'Profile format error in "%s"' % filename) | |
| finally: | |
| if f: | |
| f.close() | |
| def load_json_profile(self, json_profiles): | |
| langsize, index = len(json_profiles), 0 | |
| if langsize < 2: | |
| raise LangDetectException(ErrorCode.NeedLoadProfileError, 'Need more than 2 profiles.') | |
| for json_profile in json_profiles: | |
| try: | |
| json_data = json.loads(json_profile) | |
| profile = LangProfile(**json_data) | |
| self.add_profile(profile, index, langsize) | |
| index += 1 | |
| except: | |
| raise LangDetectException(ErrorCode.FormatError, 'Profile format error.') | |
| def add_profile(self, profile, index, langsize): | |
| lang = profile.name | |
| if lang in self.langlist: | |
| raise LangDetectException(ErrorCode.DuplicateLangError, 'Duplicate the same language profile.') | |
| self.langlist.append(lang) | |
| for word in profile.freq: | |
| if word not in self.word_lang_prob_map: | |
| self.word_lang_prob_map[word] = [0.0] * langsize | |
| length = len(word) | |
| if 1 <= length <= 3: | |
| prob = 1.0 * profile.freq.get(word) / profile.n_words[length - 1] | |
| self.word_lang_prob_map[word][index] = prob | |
| def clear(self): | |
| self.langlist = [] | |
| self.word_lang_prob_map = {} | |
| def create(self, alpha=None): | |
| '''Construct Detector instance with smoothing parameter.''' | |
| detector = self._create_detector() | |
| if alpha is not None: | |
| detector.set_alpha(alpha) | |
| return detector | |
| def _create_detector(self): | |
| if not self.langlist: | |
| raise LangDetectException(ErrorCode.NeedLoadProfileError, 'Need to load profiles.') | |
| return Detector(self) | |
| def set_seed(self, seed): | |
| self.seed = seed | |
| def get_lang_list(self): | |
| return list(self.langlist) | |
| PROFILES_DIRECTORY = path.join(path.dirname(__file__), 'profiles') | |
| _factory = None | |
| def init_factory(): | |
| global _factory | |
| if _factory is None: | |
| _factory = DetectorFactory() | |
| _factory.load_profile(PROFILES_DIRECTORY) | |
| def detect(text): | |
| init_factory() | |
| detector = _factory.create() | |
| detector.append(text) | |
| return detector.detect() | |
| def detect_langs(text): | |
| init_factory() | |
| detector = _factory.create() | |
| detector.append(text) | |
| return detector.get_probabilities() | |