diff --git a/agent/canvas.py b/agent/canvas.py index 41410e64e3d8531519130f5ea8a5451f67bd59c8..bfffabd59ce972d35f8c9ccdc0d00d03feb45ed3 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -133,7 +133,8 @@ class Canvas(ABC): "components": {} } for k in self.dsl.keys(): - if k in ["components"]:continue + if k in ["components"]: + continue dsl[k] = deepcopy(self.dsl[k]) for k, cpn in self.components.items(): @@ -158,7 +159,8 @@ class Canvas(ABC): def get_compnent_name(self, cid): for n in self.dsl["graph"]["nodes"]: - if cid == n["id"]: return n["data"]["name"] + if cid == n["id"]: + return n["data"]["name"] return "" def run(self, **kwargs): @@ -173,7 +175,8 @@ class Canvas(ABC): if kwargs.get("stream"): for an in ans(): yield an - else: yield ans + else: + yield ans return if not self.path: @@ -188,7 +191,8 @@ class Canvas(ABC): def prepare2run(cpns): nonlocal ran, ans for c in cpns: - if self.path[-1] and c == self.path[-1][-1]: continue + if self.path[-1] and c == self.path[-1][-1]: + continue cpn = self.components[c]["obj"] if cpn.component_name == "Answer": self.answer.append(c) @@ -197,7 +201,8 @@ class Canvas(ABC): if c not in without_dependent_checking: cpids = cpn.get_dependent_components() if any([cc not in self.path[-1] for cc in cpids]): - if c not in waiting: waiting.append(c) + if c not in waiting: + waiting.append(c) continue yield "*'{}'* is running...🕞".format(self.get_compnent_name(c)) ans = cpn.run(self.history, **kwargs) @@ -211,10 +216,12 @@ class Canvas(ABC): logging.debug(f"Canvas.run: {ran} {self.path}") cpn_id = self.path[-1][ran] cpn = self.get_component(cpn_id) - if not cpn["downstream"]: break + if not cpn["downstream"]: + break loop = self._find_loop() - if loop: raise OverflowError(f"Too much loops: {loop}") + if loop: + raise OverflowError(f"Too much loops: {loop}") if cpn["obj"].component_name.lower() in ["switch", "categorize", "relevant"]: switch_out = cpn["obj"].output()[1].iloc[0, 0] @@ -283,19 +290,22 @@ class Canvas(ABC): def _find_loop(self, max_loops=6): path = self.path[-1][::-1] - if len(path) < 2: return False + if len(path) < 2: + return False for i in range(len(path)): if path[i].lower().find("answer") >= 0: path = path[:i] break - if len(path) < 2: return False + if len(path) < 2: + return False - for l in range(2, len(path) // 2): - pat = ",".join(path[0:l]) + for loc in range(2, len(path) // 2): + pat = ",".join(path[0:loc]) path_str = ",".join(path) - if len(pat) >= len(path_str): return False + if len(pat) >= len(path_str): + return False loop = max_loops while path_str.find(pat) == 0 and loop >= 0: loop -= 1 @@ -303,7 +313,7 @@ class Canvas(ABC): return False path_str = path_str[len(pat)+1:] if loop < 0: - pat = " => ".join([p.split(":")[0] for p in path[0:l]]) + pat = " => ".join([p.split(":")[0] for p in path[0:loc]]) return pat + " => " + pat return False diff --git a/agent/component/__init__.py b/agent/component/__init__.py index 9fa77288bac65af0ff8f2c847d0bc74c8a598761..2bb8669f472c82bc874003f1364ca20e6cb39fba 100644 --- a/agent/component/__init__.py +++ b/agent/component/__init__.py @@ -39,3 +39,73 @@ def component_class(class_name): m = importlib.import_module("agent.component") c = getattr(m, class_name) return c + +__all__ = [ + "Begin", + "BeginParam", + "Generate", + "GenerateParam", + "Retrieval", + "RetrievalParam", + "Answer", + "AnswerParam", + "Categorize", + "CategorizeParam", + "Switch", + "SwitchParam", + "Relevant", + "RelevantParam", + "Message", + "MessageParam", + "RewriteQuestion", + "RewriteQuestionParam", + "KeywordExtract", + "KeywordExtractParam", + "Concentrator", + "ConcentratorParam", + "Baidu", + "BaiduParam", + "DuckDuckGo", + "DuckDuckGoParam", + "Wikipedia", + "WikipediaParam", + "PubMed", + "PubMedParam", + "ArXiv", + "ArXivParam", + "Google", + "GoogleParam", + "Bing", + "BingParam", + "GoogleScholar", + "GoogleScholarParam", + "DeepL", + "DeepLParam", + "GitHub", + "GitHubParam", + "BaiduFanyi", + "BaiduFanyiParam", + "QWeather", + "QWeatherParam", + "ExeSQL", + "ExeSQLParam", + "YahooFinance", + "YahooFinanceParam", + "WenCai", + "WenCaiParam", + "Jin10", + "Jin10Param", + "TuShare", + "TuShareParam", + "AkShare", + "AkShareParam", + "Crawler", + "CrawlerParam", + "Invoke", + "InvokeParam", + "Template", + "TemplateParam", + "Email", + "EmailParam", + "component_class" +] diff --git a/agent/component/base.py b/agent/component/base.py index 5825eba582ef3f76299c7d91f5327e8fb01b8a14..2660be7d37362e6ef2e7e656613de052d4b3d79e 100644 --- a/agent/component/base.py +++ b/agent/component/base.py @@ -428,7 +428,8 @@ class ComponentBase(ABC): def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]: o = getattr(self._param, self._param.output_var_name) if not isinstance(o, partial) and not isinstance(o, pd.DataFrame): - if not isinstance(o, list): o = [o] + if not isinstance(o, list): + o = [o] o = pd.DataFrame(o) if allow_partial or not isinstance(o, partial): @@ -440,7 +441,8 @@ class ComponentBase(ABC): for oo in o(): if not isinstance(oo, pd.DataFrame): outs = pd.DataFrame(oo if isinstance(oo, list) else [oo]) - else: outs = oo + else: + outs = oo return self._param.output_var_name, outs def reset(self): @@ -482,13 +484,15 @@ class ComponentBase(ABC): outs.append(pd.DataFrame([{"content": q["value"]}])) if outs: df = pd.concat(outs, ignore_index=True) - if "content" in df: df = df.drop_duplicates(subset=['content']).reset_index(drop=True) + if "content" in df: + df = df.drop_duplicates(subset=['content']).reset_index(drop=True) return df upstream_outs = [] for u in reversed_cpnts[::-1]: - if self.get_component_name(u) in ["switch", "concentrator"]: continue + if self.get_component_name(u) in ["switch", "concentrator"]: + continue if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval": o = self._canvas.get_component(u)["obj"].output(allow_partial=False)[1] if o is not None: @@ -532,7 +536,8 @@ class ComponentBase(ABC): reversed_cpnts.extend(self._canvas.path[-1]) for u in reversed_cpnts[::-1]: - if self.get_component_name(u) in ["switch", "answer"]: continue + if self.get_component_name(u) in ["switch", "answer"]: + continue return self._canvas.get_component(u)["obj"].output()[1] @staticmethod diff --git a/agent/component/categorize.py b/agent/component/categorize.py index 94f10c799163747e18a80aef3849302665869574..7b264f131fff9e25950a05b1f9b8b391cf67d06a 100644 --- a/agent/component/categorize.py +++ b/agent/component/categorize.py @@ -34,15 +34,18 @@ class CategorizeParam(GenerateParam): super().check() self.check_empty(self.category_description, "[Categorize] Category examples") for k, v in self.category_description.items(): - if not k: raise ValueError("[Categorize] Category name can not be empty!") - if not v.get("to"): raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!") + if not k: + raise ValueError("[Categorize] Category name can not be empty!") + if not v.get("to"): + raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!") def get_prompt(self): cate_lines = [] for c, desc in self.category_description.items(): - for l in desc.get("examples", "").split("\n"): - if not l: continue - cate_lines.append("Question: {}\tCategory: {}".format(l, c)) + for line in desc.get("examples", "").split("\n"): + if not line: + continue + cate_lines.append("Question: {}\tCategory: {}".format(line, c)) descriptions = [] for c, desc in self.category_description.items(): if desc.get("description"): diff --git a/agent/component/deepl.py b/agent/component/deepl.py index d5247735a81e2a8aee7adbad2d36e887fab76768..31e92729c37481ec841fc482cd488450b85c373a 100644 --- a/agent/component/deepl.py +++ b/agent/component/deepl.py @@ -14,7 +14,6 @@ # limitations under the License. # from abc import ABC -import re from agent.component.base import ComponentBase, ComponentParamBase import deepl diff --git a/agent/component/exesql.py b/agent/component/exesql.py index eac305e8b1b10cf6d82b08ec206fb597c0051a33..e73a393ed0477b61426bfea6a98f0caf77221f3a 100644 --- a/agent/component/exesql.py +++ b/agent/component/exesql.py @@ -46,8 +46,10 @@ class ExeSQLParam(ComponentParamBase): self.check_empty(self.password, "Database password") self.check_positive_integer(self.top_n, "Number of records") if self.database == "rag_flow": - if self.host == "ragflow-mysql": raise ValueError("The host is not accessible.") - if self.password == "infini_rag_flow": raise ValueError("The host is not accessible.") + if self.host == "ragflow-mysql": + raise ValueError("The host is not accessible.") + if self.password == "infini_rag_flow": + raise ValueError("The host is not accessible.") class ExeSQL(ComponentBase, ABC): diff --git a/agent/component/generate.py b/agent/component/generate.py index 555ca6b0249709d502addd3aa7f92d944747b57d..27f1ce2fdfb0616fb7a1f88b15ce1e1fd303bb39 100644 --- a/agent/component/generate.py +++ b/agent/component/generate.py @@ -51,11 +51,16 @@ class GenerateParam(ComponentParamBase): def gen_conf(self): conf = {} - if self.max_tokens > 0: conf["max_tokens"] = self.max_tokens - if self.temperature > 0: conf["temperature"] = self.temperature - if self.top_p > 0: conf["top_p"] = self.top_p - if self.presence_penalty > 0: conf["presence_penalty"] = self.presence_penalty - if self.frequency_penalty > 0: conf["frequency_penalty"] = self.frequency_penalty + if self.max_tokens > 0: + conf["max_tokens"] = self.max_tokens + if self.temperature > 0: + conf["temperature"] = self.temperature + if self.top_p > 0: + conf["top_p"] = self.top_p + if self.presence_penalty > 0: + conf["presence_penalty"] = self.presence_penalty + if self.frequency_penalty > 0: + conf["frequency_penalty"] = self.frequency_penalty return conf @@ -83,7 +88,8 @@ class Generate(ComponentBase): recall_docs = [] for i in idx: did = retrieval_res.loc[int(i), "doc_id"] - if did in doc_ids: continue + if did in doc_ids: + continue doc_ids.add(did) recall_docs.append({"doc_id": did, "doc_name": retrieval_res.loc[int(i), "docnm_kwd"]}) @@ -108,7 +114,8 @@ class Generate(ComponentBase): retrieval_res = [] self._param.inputs = [] for para in self._param.parameters: - if not para.get("component_id"): continue + if not para.get("component_id"): + continue component_id = para["component_id"].split("@")[0] if para["component_id"].lower().find("@") >= 0: cpn_id, key = para["component_id"].split("@") @@ -142,7 +149,8 @@ class Generate(ComponentBase): if retrieval_res: retrieval_res = pd.concat(retrieval_res, ignore_index=True) - else: retrieval_res = pd.DataFrame([]) + else: + retrieval_res = pd.DataFrame([]) for n, v in kwargs.items(): prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt) @@ -164,9 +172,11 @@ class Generate(ComponentBase): return pd.DataFrame([res]) msg = self._canvas.get_history(self._param.message_history_window_size) - if len(msg) < 1: msg.append({"role": "user", "content": ""}) + if len(msg) < 1: + msg.append({"role": "user", "content": ""}) _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97)) - if len(msg) < 2: msg.append({"role": "user", "content": ""}) + if len(msg) < 2: + msg.append({"role": "user", "content": ""}) ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf()) if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns: @@ -185,9 +195,11 @@ class Generate(ComponentBase): return msg = self._canvas.get_history(self._param.message_history_window_size) - if len(msg) < 1: msg.append({"role": "user", "content": ""}) + if len(msg) < 1: + msg.append({"role": "user", "content": ""}) _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97)) - if len(msg) < 2: msg.append({"role": "user", "content": ""}) + if len(msg) < 2: + msg.append({"role": "user", "content": ""}) answer = "" for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf()): res = {"content": ans, "reference": []} diff --git a/agent/component/rewrite.py b/agent/component/rewrite.py index 4257a81fda0188cd023979e879af65edc4fac26b..31390c6f6748fe2cb98272f1c368793b127c7747 100644 --- a/agent/component/rewrite.py +++ b/agent/component/rewrite.py @@ -95,7 +95,8 @@ class RewriteQuestion(Generate, ABC): hist = self._canvas.get_history(4) conv = [] for m in hist: - if m["role"] not in ["user", "assistant"]: continue + if m["role"] not in ["user", "assistant"]: + continue conv.append("{}: {}".format(m["role"].upper(), m["content"])) conv = "\n".join(conv) diff --git a/agent/component/switch.py b/agent/component/switch.py index 05daaf96fa27adc190c9e24f00f91b0c467e832b..01affe0c6c8d6ab89a3d23361638a1ec9e1c54da 100644 --- a/agent/component/switch.py +++ b/agent/component/switch.py @@ -41,7 +41,8 @@ class SwitchParam(ComponentParamBase): def check(self): self.check_empty(self.conditions, "[Switch] conditions") for cond in self.conditions: - if not cond["to"]: raise ValueError(f"[Switch] 'To' can not be empty!") + if not cond["to"]: + raise ValueError("[Switch] 'To' can not be empty!") class Switch(ComponentBase, ABC): @@ -51,7 +52,8 @@ class Switch(ComponentBase, ABC): res = [] for cond in self._param.conditions: for item in cond["items"]: - if not item["cpn_id"]: continue + if not item["cpn_id"]: + continue if item["cpn_id"].find("begin") >= 0: continue cid = item["cpn_id"].split("@")[0] @@ -63,7 +65,8 @@ class Switch(ComponentBase, ABC): for cond in self._param.conditions: res = [] for item in cond["items"]: - if not item["cpn_id"]:continue + if not item["cpn_id"]: + continue cid = item["cpn_id"].split("@")[0] if item["cpn_id"].find("@") > 0: cpn_id, key = item["cpn_id"].split("@") @@ -107,22 +110,22 @@ class Switch(ComponentBase, ABC): elif operator == ">": try: return True if float(input) > float(value) else False - except Exception as e: + except Exception: return True if input > value else False elif operator == "<": try: return True if float(input) < float(value) else False - except Exception as e: + except Exception: return True if input < value else False elif operator == "≥": try: return True if float(input) >= float(value) else False - except Exception as e: + except Exception: return True if input >= value else False elif operator == "≤": try: return True if float(input) <= float(value) else False - except Exception as e: + except Exception: return True if input <= value else False raise ValueError('Not supported operator' + operator) \ No newline at end of file diff --git a/agent/component/template.py b/agent/component/template.py index 8964752941c366b34fcae68102bc567f15351e32..140688048b74b5ad150bc5033c3bf1fc490aea31 100644 --- a/agent/component/template.py +++ b/agent/component/template.py @@ -47,7 +47,8 @@ class Template(ComponentBase): self._param.inputs = [] for para in self._param.parameters: - if not para.get("component_id"): continue + if not para.get("component_id"): + continue component_id = para["component_id"].split("@")[0] if para["component_id"].lower().find("@") >= 0: cpn_id, key = para["component_id"].split("@") diff --git a/agent/test/client.py b/agent/test/client.py index be9115290cf45c8d2d73152b745fb978c92793d3..1ab4db386ee0279f6f31a2cf612c16f9ecf59636 100644 --- a/agent/test/client.py +++ b/agent/test/client.py @@ -43,6 +43,7 @@ if __name__ == '__main__': else: print(ans["content"]) - if DEBUG: print(canvas.path) + if DEBUG: + print(canvas.path) question = input("\n==================== User =====================\n> ") canvas.add_user_input(question) diff --git a/api/apps/api_app.py b/api/apps/api_app.py index 3e8520a682c8f1b7de3a1b1553fceeafc7aa7065..3f9793b1028bc71c2c9dc236667f1856d291ed68 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -142,7 +142,6 @@ def set_conversation(): if not objs: return get_json_result( data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) - req = request.json try: if objs[0].source == "agent": e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id) @@ -188,7 +187,8 @@ def completion(): e, conv = API4ConversationService.get_by_id(req["conversation_id"]) if not e: return get_data_error_result(message="Conversation not found!") - if "quote" not in req: req["quote"] = False + if "quote" not in req: + req["quote"] = False msg = [] for m in req["messages"]: @@ -197,7 +197,8 @@ def completion(): if m["role"] == "assistant" and not msg: continue msg.append(m) - if not msg[-1].get("id"): msg[-1]["id"] = get_uuid() + if not msg[-1].get("id"): + msg[-1]["id"] = get_uuid() message_id = msg[-1]["id"] def fillin_conv(ans): @@ -674,11 +675,13 @@ def completion_faq(): e, conv = API4ConversationService.get_by_id(req["conversation_id"]) if not e: return get_data_error_result(message="Conversation not found!") - if "quote" not in req: req["quote"] = True + if "quote" not in req: + req["quote"] = True msg = [] msg.append({"role": "user", "content": req["word"]}) - if not msg[-1].get("id"): msg[-1]["id"] = get_uuid() + if not msg[-1].get("id"): + msg[-1]["id"] = get_uuid() message_id = msg[-1]["id"] def fillin_conv(ans): diff --git a/api/apps/canvas_app.py b/api/apps/canvas_app.py index b21ca87dc2afdb2235e0b09030d145a60fe6ff47..0d307ed491d9a48815215b479a644b73047137e8 100644 --- a/api/apps/canvas_app.py +++ b/api/apps/canvas_app.py @@ -13,10 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import logging import json import traceback -from functools import partial from flask import request, Response from flask_login import login_required, current_user from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService @@ -60,7 +58,8 @@ def rm(): def save(): req = request.json req["user_id"] = current_user.id - if not isinstance(req["dsl"], str): req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False) + if not isinstance(req["dsl"], str): + req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False) req["dsl"] = json.loads(req["dsl"]) if "id" not in req: @@ -153,7 +152,8 @@ def run(): return resp for answer in canvas.run(stream=False): - if answer.get("running_status"): continue + if answer.get("running_status"): + continue final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else "" canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id}) if final_ans.get("reference"): diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index 1b812fd809a9d2489b72d154d7d08e16a585b0d9..7786684dbfd8e687d6eb9acd362bf0bcfa3f7332 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -237,7 +237,8 @@ def create(): e, kb = KnowledgebaseService.get_by_id(doc.kb_id) if not e: return get_data_error_result(message="Knowledgebase not found!") - if kb.pagerank: d["pagerank_fea"] = kb.pagerank + if kb.pagerank: + d["pagerank_fea"] = kb.pagerank embd_id = DocumentService.get_embd_id(req["doc_id"]) embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id) diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 5da16dc0374eaad88c83f54806c690c7ab5d4c14..91f4bc8525e5cf103de6b267301a99539977d1a9 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -281,10 +281,12 @@ def thumbup(): if req["message_id"] == msg.get("id", "") and msg.get("role", "") == "assistant": if up_down: msg["thumbup"] = True - if "feedback" in msg: del msg["feedback"] + if "feedback" in msg: + del msg["feedback"] else: msg["thumbup"] = False - if feedback: msg["feedback"] = feedback + if feedback: + msg["feedback"] = feedback break ConversationService.update_by_id(conv["id"], conv) diff --git a/api/apps/dialog_app.py b/api/apps/dialog_app.py index 32bd62a0cba3b4558bbd8c789c654663cb398bff..0accb04a6553dd94c75fdf95d0396aa6a67e01ed 100644 --- a/api/apps/dialog_app.py +++ b/api/apps/dialog_app.py @@ -37,10 +37,12 @@ def set_dialog(): top_n = req.get("top_n", 6) top_k = req.get("top_k", 1024) rerank_id = req.get("rerank_id", "") - if not rerank_id: req["rerank_id"] = "" + if not rerank_id: + req["rerank_id"] = "" similarity_threshold = req.get("similarity_threshold", 0.1) vector_similarity_weight = req.get("vector_similarity_weight", 0.3) - if vector_similarity_weight is None: vector_similarity_weight = 0.3 + if vector_similarity_weight is None: + vector_similarity_weight = 0.3 llm_setting = req.get("llm_setting", {}) default_prompt = { "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。 diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 35dd84ebdaa9a9f03e63355f437ded3f0facd476..deb2fbeadaa663b0ac4da31a87dd4612e7c5b8ec 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License # -import json import os.path import pathlib import re @@ -90,7 +89,8 @@ def web_crawl(): raise LookupError("Can't find this knowledgebase!") blob = html2pdf(url) - if not blob: return server_error_response(ValueError("Download failure.")) + if not blob: + return server_error_response(ValueError("Download failure.")) root_folder = FileService.get_root_folder(current_user.id) pf_id = root_folder["id"] @@ -290,7 +290,8 @@ def change_status(): def rm(): req = request.json doc_ids = req["doc_id"] - if isinstance(doc_ids, str): doc_ids = [doc_ids] + if isinstance(doc_ids, str): + doc_ids = [doc_ids] for doc_id in doc_ids: if not DocumentService.accessible4deletion(doc_id, current_user.id): diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 9c7c8dcc4f20f8c5e1ac1f0b3749b8ecd623bcc2..5b8ad87d5ae81012e1340429e7939dd95599c00d 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -351,8 +351,10 @@ def list_app(): llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms]) for o in objs: - if not o.api_key: continue - if o.llm_name + "@" + o.llm_factory in llm_set: continue + if not o.api_key: + continue + if o.llm_name + "@" + o.llm_factory in llm_set: + continue llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True}) res = {} diff --git a/api/apps/sdk/agent.py b/api/apps/sdk/agent.py index 0ea4bfa00065c6cb0592005f1f3d86c8627a01a0..79cb5954cf14e9b9819eaad29f943b58dd0c1c23 100644 --- a/api/apps/sdk/agent.py +++ b/api/apps/sdk/agent.py @@ -14,7 +14,7 @@ # limitations under the License. # -from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService +from api.db.services.canvas_service import UserCanvasService from api.utils.api_utils import get_error_data_result, token_required from api.utils.api_utils import get_result from flask import request diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 0132cd9944237535abdccdbfb05def965bea903b..2a2bacbe147133ee719c46d7944a98e7ddc9994c 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -41,7 +41,6 @@ from api.utils.api_utils import construct_json_result, get_parser_config from rag.nlp import search from rag.utils import rmSpace from rag.utils.storage_factory import STORAGE_IMPL -import os MAXIMUM_OF_UPLOADING_FILES = 256 @@ -976,12 +975,12 @@ def add_chunk(tenant_id, dataset_id, document_id): if not req.get("content"): return get_error_data_result(message="`content` is required") if "important_keywords" in req: - if type(req["important_keywords"]) != list: + if not isinstance(req["important_keywords"], list): return get_error_data_result( "`important_keywords` is required to be a list" ) if "questions" in req: - if type(req["questions"]) != list: + if not isinstance(req["questions"], list): return get_error_data_result( "`questions` is required to be a list" ) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 3d9ea9d51e2d4a563356dc92b4add9303c7827ad..6fc031243266d21c6de38d528f6caf897c83cdf6 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -143,8 +143,10 @@ def completion(tenant_id, chat_id): } conv.message.append(question) for m in conv.message: - if m["role"] == "system": continue - if m["role"] == "assistant" and not msg: continue + if m["role"] == "system": + continue + if m["role"] == "assistant" and not msg: + continue msg.append(m) message_id = msg[-1].get("id") e, dia = DialogService.get_by_id(conv.dialog_id) @@ -267,7 +269,8 @@ def agent_completion(tenant_id, agent_id): if m["role"] == "assistant" and not msg: continue msg.append(m) - if not msg[-1].get("id"): msg[-1]["id"] = get_uuid() + if not msg[-1].get("id"): + msg[-1]["id"] = get_uuid() message_id = msg[-1]["id"] stream = req.get("stream", True) @@ -361,7 +364,8 @@ def agent_completion(tenant_id, agent_id): return resp for answer in canvas.run(stream=False): - if answer.get("running_status"): continue + if answer.get("running_status"): + continue final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else "" canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id}) if final_ans.get("reference"): diff --git a/api/apps/user_app.py b/api/apps/user_app.py index cc0506316fb68bd0b5ca77e5d2a5cbc2da0c6540..a9ec7c1209d3234b870e7a8991844dd9cb06e2ae 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -330,7 +330,7 @@ def user_info_from_github(access_token): headers=headers, ).json() user_info["email"] = next( - (email for email in email_info if email["primary"] == True), None + (email for email in email_info if email["primary"]), None )["email"] return user_info diff --git a/api/db/db_models.py b/api/db/db_models.py index bb3c97851eaca376d9e15aaf92bee761eb732df9..0c052ca18a4e87d704bc3053908585135b8f5097 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -130,7 +130,7 @@ def is_continuous_field(cls: typing.Type) -> bool: for p in cls.__bases__: if p in CONTINUOUS_FIELD_TYPE: return True - elif p != Field and p != object: + elif p is not Field and p is not object: if is_continuous_field(p): return True else: diff --git a/api/db/init_data.py b/api/db/init_data.py index f1d468a2ad7e693ad1e741b215365fbd58161ea7..4817b05fdd4d5526399c05438fb2d2ceb8f64229 100644 --- a/api/db/init_data.py +++ b/api/db/init_data.py @@ -170,7 +170,7 @@ def add_graph_templates(): cnvs = json.load(open(os.path.join(dir, fnm), "r")) try: CanvasTemplateService.save(**cnvs) - except: + except Exception: CanvasTemplateService.update_by_id(cnvs["id"], cnvs) except Exception: logging.exception("Add graph templates error: ") diff --git a/api/db/services/__init__.py b/api/db/services/__init__.py index 2363e65df817919802e0c7595f319203eaae0fb3..964a7a17b28cc2a737f097562e6f071c072c1dd6 100644 --- a/api/db/services/__init__.py +++ b/api/db/services/__init__.py @@ -15,13 +15,14 @@ # import pathlib import re -from .user_service import UserService +from .user_service import UserService as UserService def duplicate_name(query_func, **kwargs): fnm = kwargs["name"] objs = query_func(**kwargs) - if not objs: return fnm + if not objs: + return fnm ext = pathlib.Path(fnm).suffix #.jpg nm = re.sub(r"%s$"%ext, "", fnm) r = re.search(r"\(([0-9]+)\)$", nm) @@ -31,8 +32,8 @@ def duplicate_name(query_func, **kwargs): nm = re.sub(r"\([0-9]+\)$", "", nm) c += 1 nm = f"{nm}({c})" - if ext: nm += f"{ext}" + if ext: + nm += f"{ext}" kwargs["name"] = nm return duplicate_name(query_func, **kwargs) - diff --git a/api/db/services/api_service.py b/api/db/services/api_service.py index 864e664329c1aac86998671e7c235c0600a15af0..640a22369355013e5158072bfd252478f6348b58 100644 --- a/api/db/services/api_service.py +++ b/api/db/services/api_service.py @@ -64,7 +64,8 @@ class API4ConversationService(CommonService): @classmethod @DB.connection_context() def stats(cls, tenant_id, from_date, to_date, source=None): - if len(to_date) == 10: to_date += " 23:59:59" + if len(to_date) == 10: + to_date += " 23:59:59" return cls.model.select( cls.model.create_date.truncate("day").alias("dt"), peewee.fn.COUNT( diff --git a/api/db/services/canvas_service.py b/api/db/services/canvas_service.py index 23eb186934f880f7ff2961aad42a49a29107227d..0fac2f24850a39b506f30a3f585737bf43a680c1 100644 --- a/api/db/services/canvas_service.py +++ b/api/db/services/canvas_service.py @@ -13,9 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from datetime import datetime -import peewee -from api.db.db_models import DB, API4Conversation, APIToken, Dialog, CanvasTemplate, UserCanvas +from api.db.db_models import DB, CanvasTemplate, UserCanvas from api.db.services.common_service import CommonService diff --git a/api/db/services/common_service.py b/api/db/services/common_service.py index f0f52930c7fd848a7e8e052c7c2338dad62014aa..dcbe28cda637bb45194f63ab64d7695b53ea4583 100644 --- a/api/db/services/common_service.py +++ b/api/db/services/common_service.py @@ -115,7 +115,7 @@ class CommonService: try: obj = cls.model.query(id=pid)[0] return True, obj - except Exception as e: + except Exception: return False, None @classmethod diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index 9e9aa507186e07e6b2c68e8634c7b980edf148b1..1a63b7962235d38621b391b7833fe995f24a8373 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -106,15 +106,15 @@ def message_fit_in(msg, max_length=4000): return c, msg ll = num_tokens_from_string(msg_[0]["content"]) - l = num_tokens_from_string(msg_[-1]["content"]) - if ll / (ll + l) > 0.8: + ll2 = num_tokens_from_string(msg_[-1]["content"]) + if ll / (ll + ll2) > 0.8: m = msg_[0]["content"] - m = encoder.decode(encoder.encode(m)[:max_length - l]) + m = encoder.decode(encoder.encode(m)[:max_length - ll2]) msg[0]["content"] = m return max_length, msg m = msg_[1]["content"] - m = encoder.decode(encoder.encode(m)[:max_length - l]) + m = encoder.decode(encoder.encode(m)[:max_length - ll2]) msg[1]["content"] = m return max_length, msg @@ -257,7 +257,8 @@ def chat(dialog, messages, stream=True, **kwargs): idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx]) recall_docs = [ d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx] - if not recall_docs: recall_docs = kbinfos["doc_aggs"] + if not recall_docs: + recall_docs = kbinfos["doc_aggs"] kbinfos["doc_aggs"] = recall_docs refs = deepcopy(kbinfos) @@ -433,13 +434,15 @@ def relevant(tenant_id, llm_id, question, contents: list): Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. No other words needed except 'yes' or 'no'. """ - if not contents:return False + if not contents: + return False contents = "Documents: \n" + " - ".join(contents) contents = f"Question: {question}\n" + contents if num_tokens_from_string(contents) >= chat_mdl.max_length - 4: contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4]) ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01}) - if ans.lower().find("yes") >= 0: return True + if ans.lower().find("yes") >= 0: + return True return False @@ -481,8 +484,10 @@ Requirements: ] _, msg = message_fit_in(msg, chat_mdl.max_length) kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2}) - if isinstance(kwd, tuple): kwd = kwd[0] - if kwd.find("**ERROR**") >=0: return "" + if isinstance(kwd, tuple): + kwd = kwd[0] + if kwd.find("**ERROR**") >=0: + return "" return kwd @@ -508,8 +513,10 @@ Requirements: ] _, msg = message_fit_in(msg, chat_mdl.max_length) kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2}) - if isinstance(kwd, tuple): kwd = kwd[0] - if kwd.find("**ERROR**") >= 0: return "" + if isinstance(kwd, tuple): + kwd = kwd[0] + if kwd.find("**ERROR**") >= 0: + return "" return kwd @@ -520,7 +527,8 @@ def full_question(tenant_id, llm_id, messages): chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) conv = [] for m in messages: - if m["role"] not in ["user", "assistant"]: continue + if m["role"] not in ["user", "assistant"]: + continue conv.append("{}: {}".format(m["role"].upper(), m["content"])) conv = "\n".join(conv) today = datetime.date.today().isoformat() @@ -581,7 +589,8 @@ Output: What's the weather in Rochester on {tomorrow}? def tts(tts_mdl, text): - if not tts_mdl or not text: return + if not tts_mdl or not text: + return bin = b"" for chunk in tts_mdl.tts(text): bin += chunk @@ -641,7 +650,8 @@ def ask(question, kb_ids, tenant_id): idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx]) recall_docs = [ d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx] - if not recall_docs: recall_docs = kbinfos["doc_aggs"] + if not recall_docs: + recall_docs = kbinfos["doc_aggs"] kbinfos["doc_aggs"] = recall_docs refs = deepcopy(kbinfos) for c in refs["chunks"]: diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 9feb69da01b8c1395ab7dcda792e8c732fe7c8d1..aea4931ebf300c2ea61d36c9d27e7957a197c83a 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -532,7 +532,8 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): try: mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output, ensure_ascii=False, indent=2) - if len(mind_map) < 32: raise Exception("Few content: " + mind_map) + if len(mind_map) < 32: + raise Exception("Few content: " + mind_map) cks.append({ "id": get_uuid(), "doc_id": doc_id, diff --git a/api/db/services/file2document_service.py b/api/db/services/file2document_service.py index e04ed190cb73e255456dfb3f8d254e760178140e..f3f587e465440778bafb5ecfec984fb87cfc6c48 100644 --- a/api/db/services/file2document_service.py +++ b/api/db/services/file2document_service.py @@ -20,7 +20,7 @@ from api.db.db_models import DB from api.db.db_models import File, File2Document from api.db.services.common_service import CommonService from api.db.services.document_service import DocumentService -from api.utils import current_timestamp, datetime_format, get_uuid +from api.utils import current_timestamp, datetime_format class File2DocumentService(CommonService): @@ -63,7 +63,7 @@ class File2DocumentService(CommonService): def update_by_file_id(cls, file_id, obj): obj["update_time"] = current_timestamp() obj["update_date"] = datetime_format(datetime.now()) - num = cls.model.update(obj).where(cls.model.id == file_id).execute() + # num = cls.model.update(obj).where(cls.model.id == file_id).execute() e, obj = cls.get_by_id(cls.model.id) return obj diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 2602bf3764b4ec36376b71fd322a21270108b756..be225aeeb907a8ae37958b3b6b3bbd9adecb8ca0 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -85,7 +85,8 @@ class FileService(CommonService): .join(Document, on=(File2Document.document_id == Document.id)) .join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id)) .where(cls.model.id == file_id)) - if not kbs: return [] + if not kbs: + return [] kbs_info_list = [] for kb in list(kbs.dicts()): kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']}) @@ -304,7 +305,8 @@ class FileService(CommonService): @classmethod @DB.connection_context() def add_file_from_kb(cls, doc, kb_folder_id, tenant_id): - for _ in File2DocumentService.get_by_document_id(doc["id"]): return + for _ in File2DocumentService.get_by_document_id(doc["id"]): + return file = { "id": get_uuid(), "parent_id": kb_folder_id, diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 16fd42ca161869843acaee9ad42c9453306f2b74..2d47a93effd9411237bbc1fe872130889653ad41 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -107,7 +107,8 @@ class TenantLLMService(CommonService): model_config = cls.get_api_key(tenant_id, mdlnm) mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm) - if model_config: model_config = model_config.to_dict() + if model_config: + model_config = model_config.to_dict() if not model_config: if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]: llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid) diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py index b55621fe8815b6e5d2d8a9d00086283bb71e6395..424a571ee5731f565df3eeb9c70c39767f13c55a 100644 --- a/api/db/services/task_service.py +++ b/api/db/services/task_service.py @@ -57,28 +57,33 @@ class TaskService(CommonService): Tenant.img2txt_id, Tenant.asr_id, Tenant.llm_id, - cls.model.update_time] - docs = cls.model.select(*fields) \ - .join(Document, on=(cls.model.doc_id == Document.id)) \ - .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \ - .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \ + cls.model.update_time, + ] + docs = ( + cls.model.select(*fields) + .join(Document, on=(cls.model.doc_id == Document.id)) + .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) + .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) .where(cls.model.id == task_id) + ) docs = list(docs.dicts()) - if not docs: return None + if not docs: + return None msg = "\nTask has been received." - prog = random.random() / 10. + prog = random.random() / 10.0 if docs[0]["retry_count"] >= 3: msg = "\nERROR: Task is abandoned after 3 times attempts." prog = -1 - cls.model.update(progress_msg=cls.model.progress_msg + msg, - progress=prog, - retry_count=docs[0]["retry_count"]+1 - ).where( - cls.model.id == docs[0]["id"]).execute() + cls.model.update( + progress_msg=cls.model.progress_msg + msg, + progress=prog, + retry_count=docs[0]["retry_count"] + 1, + ).where(cls.model.id == docs[0]["id"]).execute() - if docs[0]["retry_count"] >= 3: return None + if docs[0]["retry_count"] >= 3: + return None return docs[0] @@ -86,21 +91,44 @@ class TaskService(CommonService): @DB.connection_context() def get_ongoing_doc_name(cls): with DB.lock("get_task", -1): - docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \ - .join(Document, on=(cls.model.doc_id == Document.id)) \ - .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \ - .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \ + docs = ( + cls.model.select( + *[Document.id, Document.kb_id, Document.location, File.parent_id] + ) + .join(Document, on=(cls.model.doc_id == Document.id)) + .join( + File2Document, + on=(File2Document.document_id == Document.id), + join_type=JOIN.LEFT_OUTER, + ) + .join( + File, + on=(File2Document.file_id == File.id), + join_type=JOIN.LEFT_OUTER, + ) .where( Document.status == StatusEnum.VALID.value, Document.run == TaskStatus.RUNNING.value, ~(Document.type == FileType.VIRTUAL.value), cls.model.progress < 1, - cls.model.create_time >= current_timestamp() - 1000 * 600 + cls.model.create_time >= current_timestamp() - 1000 * 600, ) + ) docs = list(docs.dicts()) - if not docs: return [] - - return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs])) + if not docs: + return [] + + return list( + set( + [ + ( + d["parent_id"] if d["parent_id"] else d["kb_id"], + d["location"], + ) + for d in docs + ] + ) + ) @classmethod @DB.connection_context() @@ -118,28 +146,30 @@ class TaskService(CommonService): def update_progress(cls, id, info): if os.environ.get("MACOS"): if info["progress_msg"]: - cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where( - cls.model.id == id).execute() + cls.model.update( + progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"] + ).where(cls.model.id == id).execute() if "progress" in info: cls.model.update(progress=info["progress"]).where( - cls.model.id == id).execute() + cls.model.id == id + ).execute() return with DB.lock("update_progress", -1): if info["progress_msg"]: - cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where( - cls.model.id == id).execute() + cls.model.update( + progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"] + ).where(cls.model.id == id).execute() if "progress" in info: cls.model.update(progress=info["progress"]).where( - cls.model.id == id).execute() + cls.model.id == id + ).execute() def queue_tasks(doc: dict, bucket: str, name: str): def new_task(): - return { - "id": get_uuid(), - "doc_id": doc["id"] - } + return {"id": get_uuid(), "doc_id": doc["id"]} + tsks = [] if doc["type"] == FileType.PDF.value: @@ -150,8 +180,8 @@ def queue_tasks(doc: dict, bucket: str, name: str): if doc["parser_id"] == "paper": page_size = doc["parser_config"].get("task_page_size", 22) if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout: - page_size = 10 ** 9 - page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)] + page_size = 10**9 + page_ranges = doc["parser_config"].get("pages") or [(1, 10**5)] for s, e in page_ranges: s -= 1 s = max(0, s) @@ -177,4 +207,6 @@ def queue_tasks(doc: dict, bucket: str, name: str): DocumentService.begin2parse(doc["id"]) for t in tsks: - assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status." + assert REDIS_CONN.queue_product( + SVR_QUEUE_NAME, message=t + ), "Can't access Redis. Please check the Redis' status." diff --git a/api/db/services/user_service.py b/api/db/services/user_service.py index 49a1d7f9bad9e7382d077756e6ca963acf4cb195..44cafb87ace9c7e0eca693dc49c125dddf926f21 100644 --- a/api/db/services/user_service.py +++ b/api/db/services/user_service.py @@ -22,7 +22,7 @@ from api.db import UserTenantRole from api.db.db_models import DB, UserTenant from api.db.db_models import User, Tenant from api.db.services.common_service import CommonService -from api.utils import get_uuid, get_format_time, current_timestamp, datetime_format +from api.utils import get_uuid, current_timestamp, datetime_format from api.db import StatusEnum diff --git a/api/ragflow_server.py b/api/ragflow_server.py index 0d7cd6a366723a3ec0e0ea08a36268b213a1289a..713ba1881e68f2347d66d5bb8854fa33644e9ea2 100644 --- a/api/ragflow_server.py +++ b/api/ragflow_server.py @@ -21,10 +21,7 @@ import logging import os from api.utils.log_utils import initRootLogger -LOG_LEVELS = os.environ.get("LOG_LEVELS", "") -initRootLogger("ragflow_server", LOG_LEVELS) -import os import signal import sys import time @@ -44,6 +41,9 @@ from api.versions import get_ragflow_version from api.utils import show_configs from rag.settings import print_rag_settings +LOG_LEVELS = os.environ.get("LOG_LEVELS", "") +initRootLogger("ragflow_server", LOG_LEVELS) + def update_progress(): while True: diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index e20b85d29f859a87adc0af1d03cd752916a9b00f..635497d07e70c71fa1786789a92f17344ddf58c7 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -36,7 +36,6 @@ from werkzeug.http import HTTP_STATUS_CODES from api.db.db_models import APIToken from api import settings -from api import settings from api.utils import CustomJSONEncoder, get_uuid from api.utils import json_dumps from api.constants import REQUEST_WAIT_SEC, REQUEST_MAX_WAIT_SEC diff --git a/api/validation.py b/api/validation.py index 39d506a8f2413cd4f342b9fef12198d74b46790f..b552b3375a4a86365844d13e97b1210e92deb9e4 100644 --- a/api/validation.py +++ b/api/validation.py @@ -45,5 +45,5 @@ try: pool = Pool(processes=1) thread = pool.apply_async(download_nltk_data) binary = thread.get(timeout=60) -except Exception as e: +except Exception: print('\x1b[6;37;41m WARNING \x1b[0m' + "Downloading NLTK data failure.", flush=True) diff --git a/deepdoc/parser/__init__.py b/deepdoc/parser/__init__.py index 67e5b5a8dcb76d62e2e22494964be805211ff557..2a62a989468b987a227ead60383964af3bb24d7c 100644 --- a/deepdoc/parser/__init__.py +++ b/deepdoc/parser/__init__.py @@ -18,4 +18,16 @@ from .ppt_parser import RAGFlowPptParser as PptParser from .html_parser import RAGFlowHtmlParser as HtmlParser from .json_parser import RAGFlowJsonParser as JsonParser from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser -from .txt_parser import RAGFlowTxtParser as TxtParser \ No newline at end of file +from .txt_parser import RAGFlowTxtParser as TxtParser + +__all__ = [ + "PdfParser", + "PlainParser", + "DocxParser", + "ExcelParser", + "PptParser", + "HtmlParser", + "JsonParser", + "MarkdownParser", + "TxtParser", +] \ No newline at end of file diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py index 4bb509061245b78d9ba68cadccc2f6dcade02f51..1d23978b70714808f25d93cc8e1d811abc922fae 100644 --- a/deepdoc/parser/excel_parser.py +++ b/deepdoc/parser/excel_parser.py @@ -29,7 +29,8 @@ class RAGFlowExcelParser: for sheetname in wb.sheetnames: ws = wb[sheetname] rows = list(ws.rows) - if not rows: continue + if not rows: + continue tb_rows_0 = "" for t in list(rows[0]): @@ -40,7 +41,9 @@ class RAGFlowExcelParser: tb = "" tb += f"" tb += tb_rows_0 - for r in list(rows[1 + chunk_i * chunk_rows:1 + (chunk_i + 1) * chunk_rows]): + for r in list( + rows[1 + chunk_i * chunk_rows : 1 + (chunk_i + 1) * chunk_rows] + ): tb += "" for i, c in enumerate(r): if c.value is None: @@ -62,20 +65,21 @@ class RAGFlowExcelParser: for sheetname in wb.sheetnames: ws = wb[sheetname] rows = list(ws.rows) - if not rows:continue + if not rows: + continue ti = list(rows[0]) for r in list(rows[1:]): - l = [] + fields = [] for i, c in enumerate(r): if not c.value: continue t = str(ti[i].value) if i < len(ti) else "" t += (":" if t else "") + str(c.value) - l.append(t) - l = "; ".join(l) + fields.append(t) + line = "; ".join(fields) if sheetname.lower().find("sheet") < 0: - l += " ——" + sheetname - res.append(l) + line += " ——" + sheetname + res.append(line) return res @staticmethod diff --git a/deepdoc/parser/html_parser.py b/deepdoc/parser/html_parser.py index e02aaa1f3e51210e733ec57b8d0d433366c0345b..973dbbd4644ddb22c873ebd8405da4c9b7eb2637 100644 --- a/deepdoc/parser/html_parser.py +++ b/deepdoc/parser/html_parser.py @@ -36,7 +36,7 @@ class RAGFlowHtmlParser: @classmethod def parser_txt(cls, txt): - if type(txt) != str: + if not isinstance(txt, str): raise TypeError("txt type should be str!") html_doc = readability.Document(txt) title = html_doc.title() diff --git a/deepdoc/parser/json_parser.py b/deepdoc/parser/json_parser.py index 1dd620d44d38822a95a3068199933803cb7dd6d4..08ddc89acf07a094b3d08cd2d6d93f74079a02cb 100644 --- a/deepdoc/parser/json_parser.py +++ b/deepdoc/parser/json_parser.py @@ -22,7 +22,7 @@ class RAGFlowJsonParser: txt = binary.decode(encoding, errors="ignore") json_data = json.loads(txt) chunks = self.split_json(json_data, True) - sections = [json.dumps(l, ensure_ascii=False) for l in chunks if l] + sections = [json.dumps(line, ensure_ascii=False) for line in chunks if line] return sections @staticmethod diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 5ffecde4b9b1ef58fa6ea884aebd15e73b4e3108..82fab32d24e5f30e2d84f964ed792ca843d15626 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -752,7 +752,7 @@ class RAGFlowPdfParser: "x1": np.max([b["x1"] for b in bxs]), "bottom": np.max([b["bottom"] for b in bxs]) - ht } - louts = [l for l in self.page_layout[pn] if l["type"] == ltype] + louts = [layout for layout in self.page_layout[pn] if layout["type"] == ltype] ii = Recognizer.find_overlapped(b, louts, naive=True) if ii is not None: b = louts[ii] @@ -763,7 +763,8 @@ class RAGFlowPdfParser: "layoutno", ""))) left, top, right, bott = b["x0"], b["top"], b["x1"], b["bottom"] - if right < left: right = left + 1 + if right < left: + right = left + 1 poss.append((pn + self.page_from, left, right, top, bott)) return self.page_images[pn] \ .crop((left * ZM, top * ZM, @@ -845,7 +846,8 @@ class RAGFlowPdfParser: top = bx["top"] - self.page_cum_height[pn[0] - 1] bott = bx["bottom"] - self.page_cum_height[pn[0] - 1] page_images_cnt = len(self.page_images) - if pn[-1] - 1 >= page_images_cnt: return "" + if pn[-1] - 1 >= page_images_cnt: + return "" while bott * ZM > self.page_images[pn[-1] - 1].size[1]: bott -= self.page_images[pn[-1] - 1].size[1] / ZM pn.append(pn[-1] + 1) @@ -889,7 +891,6 @@ class RAGFlowPdfParser: nonlocal mh, pw, lines, widths lines.append(line) widths.append(width(line)) - width_mean = np.mean(widths) mmj = self.proj_match( line["text"]) or line.get( "layout_type", @@ -994,7 +995,7 @@ class RAGFlowPdfParser: else: self.is_english = False - st = timer() + # st = timer() for i, img in enumerate(self.page_images_x2): chars = self.page_chars[i] if not self.is_english else [] self.mean_height.append( @@ -1028,8 +1029,8 @@ class RAGFlowPdfParser: self.page_cum_height = np.cumsum(self.page_cum_height) assert len(self.page_cum_height) == len(self.page_images) + 1 - if len(self.boxes) == 0 and zoomin < 9: self.__images__(fnm, zoomin * 3, page_from, - page_to, callback) + if len(self.boxes) == 0 and zoomin < 9: + self.__images__(fnm, zoomin * 3, page_from, page_to, callback) def __call__(self, fnm, need_image=True, zoomin=3, return_html=False): self.__images__(fnm, zoomin) @@ -1168,7 +1169,7 @@ class PlainParser(object): if not self.outlines: logging.warning("Miss outlines") - return [(l, "") for l in lines], [] + return [(line, "") for line in lines], [] def crop(self, ck, need_position): raise NotImplementedError diff --git a/deepdoc/parser/resume/__init__.py b/deepdoc/parser/resume/__init__.py index fab6f7e716eb0d5ac2aebda5690c6f704f85da74..1038bf2b7b128a89cff113f3b75fea4b35a8d89f 100644 --- a/deepdoc/parser/resume/__init__.py +++ b/deepdoc/parser/resume/__init__.py @@ -15,21 +15,42 @@ import datetime def refactor(cv): - for n in ["raw_txt", "parser_name", "inference", "ori_text", "use_time", "time_stat"]: - if n in cv and cv[n] is not None: del cv[n] + for n in [ + "raw_txt", + "parser_name", + "inference", + "ori_text", + "use_time", + "time_stat", + ]: + if n in cv and cv[n] is not None: + del cv[n] cv["is_deleted"] = 0 - if "basic" not in cv: cv["basic"] = {} - if cv["basic"].get("photo2"): del cv["basic"]["photo2"] + if "basic" not in cv: + cv["basic"] = {} + if cv["basic"].get("photo2"): + del cv["basic"]["photo2"] - for n in ["education", "work", "certificate", "project", "language", "skill", "training"]: - if n not in cv or cv[n] is None: continue - if type(cv[n]) == type({}): cv[n] = [v for _, v in cv[n].items()] - if type(cv[n]) != type([]): + for n in [ + "education", + "work", + "certificate", + "project", + "language", + "skill", + "training", + ]: + if n not in cv or cv[n] is None: + continue + if isinstance(cv[n], dict): + cv[n] = [v for _, v in cv[n].items()] + if not isinstance(cv[n], list): del cv[n] continue vv = [] for v in cv[n]: - if "external" in v and v["external"] is not None: del v["external"] + if "external" in v and v["external"] is not None: + del v["external"] vv.append(v) cv[n] = {str(i): vv[i] for i in range(len(vv))} @@ -42,24 +63,44 @@ def refactor(cv): cv["basic"][t] = cv["basic"][n] del cv["basic"][n] - work = sorted([v for _, v in cv.get("work", {}).items()], key=lambda x: x.get("start_time", "")) - edu = sorted([v for _, v in cv.get("education", {}).items()], key=lambda x: x.get("start_time", "")) + work = sorted( + [v for _, v in cv.get("work", {}).items()], + key=lambda x: x.get("start_time", ""), + ) + edu = sorted( + [v for _, v in cv.get("education", {}).items()], + key=lambda x: x.get("start_time", ""), + ) if work: cv["basic"]["work_start_time"] = work[0].get("start_time", "") - cv["basic"]["management_experience"] = 'Y' if any( - [w.get("management_experience", '') == 'Y' for w in work]) else 'N' + cv["basic"]["management_experience"] = ( + "Y" + if any([w.get("management_experience", "") == "Y" for w in work]) + else "N" + ) cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0") - for n in ["annual_salary_from", "annual_salary_to", "industry_name", "position_name", "responsibilities", - "corporation_type", "scale", "corporation_name"]: + for n in [ + "annual_salary_from", + "annual_salary_to", + "industry_name", + "position_name", + "responsibilities", + "corporation_type", + "scale", + "corporation_name", + ]: cv["basic"][n] = work[-1].get(n, "") if edu: for n in ["school_name", "discipline_name"]: - if n in edu[-1]: cv["basic"][n] = edu[-1][n] + if n in edu[-1]: + cv["basic"][n] = edu[-1][n] cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - if "contact" not in cv: cv["contact"] = {} - if not cv["contact"].get("name"): cv["contact"]["name"] = cv["basic"].get("name", "") - return cv \ No newline at end of file + if "contact" not in cv: + cv["contact"] = {} + if not cv["contact"].get("name"): + cv["contact"]["name"] = cv["basic"].get("name", "") + return cv diff --git a/deepdoc/parser/resume/entities/corporations.py b/deepdoc/parser/resume/entities/corporations.py index 142b0f5e492a2d941af8b52918f661641929a90b..6d0b293de4a911b9d311ac3ab945947f8abf803c 100644 --- a/deepdoc/parser/resume/entities/corporations.py +++ b/deepdoc/parser/resume/entities/corporations.py @@ -21,13 +21,18 @@ from . import regions current_file_path = os.path.dirname(os.path.abspath(__file__)) -GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0) +GOODS = pd.read_csv( + os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0 +).fillna(0) GOODS["cid"] = GOODS["cid"].astype(str) GOODS = GOODS.set_index(["cid"]) -CORP_TKS = json.load(open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r")) +CORP_TKS = json.load( + open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r") +) GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r")) CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r")) + def baike(cid, default_v=0): global GOODS try: @@ -39,27 +44,41 @@ def baike(cid, default_v=0): def corpNorm(nm, add_region=True): global CORP_TKS - if not nm or type(nm)!=type(""):return "" + if not nm or isinstance(nm, str): + return "" nm = rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(nm)).lower() nm = re.sub(r"&", "&", nm) nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm) - nm = re.sub(r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE) - nm = re.sub(r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", "", nm, 10000, re.IGNORECASE) - if not nm or (len(nm)<5 and not regions.isName(nm[0:2])):return nm + nm = re.sub( + r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE + ) + nm = re.sub( + r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", + "", + nm, + 10000, + re.IGNORECASE, + ) + if not nm or (len(nm) < 5 and not regions.isName(nm[0:2])): + return nm tks = rag_tokenizer.tokenize(nm).split() - reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)] + reg = [t for i, t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)] nm = "" for t in tks: - if regions.isName(t) or t in CORP_TKS:continue - if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):nm += " " + if regions.isName(t) or t in CORP_TKS: + continue + if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm): + nm += " " nm += t r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip()) - if r:nm = r.group(1) + if r: + nm = r.group(1) r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip()) - if r:nm = r.group(1) - return nm.strip() + (("" if not reg else "(%s)"%reg[0]) if add_region else "") + if r: + nm = r.group(1) + return nm.strip() + (("" if not reg else "(%s)" % reg[0]) if add_region else "") def rmNoise(n): @@ -67,33 +86,40 @@ def rmNoise(n): n = re.sub(r"[,. &()()]+", "", n) return n + GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP]) -for c,v in CORP_TAG.items(): +for c, v in CORP_TAG.items(): cc = corpNorm(rmNoise(c), False) if not cc: logging.debug(c) -CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()} +CORP_TAG = {corpNorm(rmNoise(c), False): v for c, v in CORP_TAG.items()} + def is_good(nm): global GOOD_CORP - if nm.find("外派")>=0:return False + if nm.find("外派") >= 0: + return False nm = rmNoise(nm) nm = corpNorm(nm, False) for n in GOOD_CORP: if re.match(r"[0-9a-zA-Z]+$", n): - if n == nm: return True - elif nm.find(n)>=0:return True + if n == nm: + return True + elif nm.find(n) >= 0: + return True return False + def corp_tag(nm): global CORP_TAG nm = rmNoise(nm) nm = corpNorm(nm, False) for n in CORP_TAG.keys(): if re.match(r"[0-9a-zA-Z., ]+$", n): - if n == nm: return CORP_TAG[n] - elif nm.find(n)>=0: - if len(n)<3 and len(nm)/len(n)>=2:continue + if n == nm: + return CORP_TAG[n] + elif nm.find(n) >= 0: + if len(n) < 3 and len(nm) / len(n) >= 2: + continue return CORP_TAG[n] return [] - diff --git a/deepdoc/parser/resume/entities/degrees.py b/deepdoc/parser/resume/entities/degrees.py index dc2d5bc170cd171950a9ba8d3e77e88b94af24ce..47a90b58211a7ad1aea20ac0f4cf6c58145b8c22 100644 --- a/deepdoc/parser/resume/entities/degrees.py +++ b/deepdoc/parser/resume/entities/degrees.py @@ -11,27 +11,31 @@ # limitations under the License. # -TBL = {"94":"EMBA", -"6":"MBA", -"95":"MPA", -"92":"专升本", -"4":"专科", -"90":"中专", -"91":"中技", -"86":"初中", -"3":"博士", -"10":"博士后", -"1":"本科", -"2":"硕士", -"87":"职高", -"89":"高中" +TBL = { + "94": "EMBA", + "6": "MBA", + "95": "MPA", + "92": "专升本", + "4": "专科", + "90": "中专", + "91": "中技", + "86": "初中", + "3": "博士", + "10": "博士后", + "1": "本科", + "2": "硕士", + "87": "职高", + "89": "高中", } -TBL_ = {v:k for k,v in TBL.items()} +TBL_ = {v: k for k, v in TBL.items()} + def get_name(id): return TBL.get(str(id), "") + def get_id(nm): - if not nm:return "" + if not nm: + return "" return TBL_.get(nm.upper().strip(), "") diff --git a/deepdoc/parser/resume/entities/industries.py b/deepdoc/parser/resume/entities/industries.py index 9eeb10e55f5728125b281037cae0917d0c3a9c2e..4768ceb50e8995b6c2e69bf011a57663ac41445b 100644 --- a/deepdoc/parser/resume/entities/industries.py +++ b/deepdoc/parser/resume/entities/industries.py @@ -11,694 +11,699 @@ # limitations under the License. # -TBL = {"1":{"name":"IT/通信/电子","parent":"0"}, -"2":{"name":"互联网","parent":"0"}, -"3":{"name":"电子商务","parent":"2"}, -"4":{"name":"互联网金融","parent":"2"}, -"5":{"name":"网络游戏","parent":"2"}, -"6":{"name":"社交网络平台","parent":"2"}, -"7":{"name":"视频音乐","parent":"2"}, -"9":{"name":"安全","parent":"2"}, -"10":{"name":"云计算","parent":"2"}, -"12":{"name":"工具类客户端应用","parent":"2"}, -"13":{"name":"互联网广告","parent":"2"}, -"14":{"name":"企业互联网服务","parent":"2"}, -"16":{"name":"在线教育","parent":"2"}, -"17":{"name":"在线医疗","parent":"2"}, -"19":{"name":"B2B","parent":"3"}, -"20":{"name":"B2C","parent":"3"}, -"21":{"name":"C2C","parent":"3"}, -"22":{"name":"生活信息本地化","parent":"3"}, -"23":{"name":"在线旅游","parent":"2"}, -"24":{"name":"第三方支付","parent":"4"}, -"26":{"name":"客户端游戏","parent":"5"}, -"27":{"name":"网页游戏","parent":"5"}, -"28":{"name":"手机游戏","parent":"5"}, -"29":{"name":"微博","parent":"6"}, -"30":{"name":"社交网站","parent":"6"}, -"31":{"name":"在线视频","parent":"7"}, -"32":{"name":"在线音乐","parent":"7"}, -"35":{"name":"企业安全","parent":"9"}, -"36":{"name":"个人安全","parent":"9"}, -"37":{"name":"企业级云服务","parent":"10"}, -"38":{"name":"个人级云服务","parent":"10"}, -"43":{"name":"输入法","parent":"12"}, -"44":{"name":"浏览器","parent":"12"}, -"45":{"name":"词典","parent":"12"}, -"46":{"name":"播放器","parent":"12"}, -"47":{"name":"下载器","parent":"12"}, -"48":{"name":"IM","parent":"12"}, -"49":{"name":"广告服务","parent":"13"}, -"50":{"name":"第三方广告网络平台","parent":"13"}, -"51":{"name":"媒体代理","parent":"13"}, -"52":{"name":"创意代理","parent":"13"}, -"53":{"name":"IT-综合","parent":"1"}, -"71":{"name":"团购","parent":"3"}, -"72":{"name":"地图","parent":"2"}, -"73":{"name":"数据存储","parent":"2"}, -"414":{"name":"计算机软件","parent":"1"}, -"415":{"name":"计算机硬件","parent":"1"}, -"416":{"name":"计算机服务(系统、数据服务、维修)","parent":"1"}, -"417":{"name":"通信/电信/网络设备","parent":"1"}, -"418":{"name":"通信/电信运营、增值服务","parent":"1"}, -"419":{"name":"电子技术/半导体/集成电路","parent":"1"}, -"472":{"name":"P2P网贷","parent":"4"}, -"473":{"name":"互联网理财","parent":"4"}, -"474":{"name":"婚恋","parent":"6"}, -"476":{"name":"虚拟化","parent":"10"}, -"477":{"name":"邮箱","parent":"12"}, -"478":{"name":"商业智能","parent":"14"}, -"479":{"name":"企业建站","parent":"14"}, -"480":{"name":"安防","parent":"14"}, -"481":{"name":"网络营销","parent":"2"}, -"487":{"name":"智能终端","parent":"2"}, -"488":{"name":"移动互联网","parent":"2"}, -"489":{"name":"数字城市","parent":"2"}, -"490":{"name":"大数据","parent":"2"}, -"491":{"name":"互联网人力资源","parent":"2"}, -"492":{"name":"舆情监控","parent":"2"}, -"493":{"name":"移动营销","parent":"481"}, -"494":{"name":"微博营销","parent":"481"}, -"495":{"name":"精准营销","parent":"481"}, -"496":{"name":"海外营销","parent":"481"}, -"497":{"name":"微信营销","parent":"481"}, -"498":{"name":"智能手机","parent":"487"}, -"499":{"name":"可穿戴设备","parent":"487"}, -"500":{"name":"智能电视","parent":"487"}, -"501":{"name":"WAP","parent":"488"}, -"502":{"name":"物联网","parent":"489"}, -"503":{"name":"O2O","parent":"489"}, -"504":{"name":"数字出版","parent":"489"}, -"505":{"name":"搜索","parent":"2"}, -"506":{"name":"垂直搜索","parent":"505"}, -"507":{"name":"无线搜索","parent":"505"}, -"508":{"name":"网页搜索","parent":"505"}, -"509":{"name":"网址导航","parent":"2"}, -"510":{"name":"门户","parent":"2"}, -"511":{"name":"网络文学","parent":"2"}, -"512":{"name":"自媒体","parent":"2"}, -"513":{"name":"金融","parent":"0"}, -"514":{"name":"建筑与房地产","parent":"0"}, -"515":{"name":"专业服务","parent":"0"}, -"516":{"name":"教育培训","parent":"0"}, -"517":{"name":"文化传媒","parent":"0"}, -"518":{"name":"消费品","parent":"0"}, -"519":{"name":"工业","parent":"0"}, -"520":{"name":"交通物流","parent":"0"}, -"521":{"name":"贸易","parent":"0"}, -"522":{"name":"医药","parent":"0"}, -"523":{"name":"医疗器械","parent":"522"}, -"524":{"name":"保健品","parent":"518"}, -"525":{"name":"服务业","parent":"0"}, -"526":{"name":"能源/矿产/环保","parent":"0"}, -"527":{"name":"化工","parent":"0"}, -"528":{"name":"政府","parent":"0"}, -"529":{"name":"公共事业","parent":"0"}, -"530":{"name":"非盈利机构","parent":"0"}, -"531":{"name":"农业","parent":"1131"}, -"532":{"name":"林业","parent":"1131"}, -"533":{"name":"畜牧业","parent":"1131"}, -"534":{"name":"渔业","parent":"1131"}, -"535":{"name":"学术科研","parent":"0"}, -"536":{"name":"零售","parent":"0"}, -"537":{"name":"银行","parent":"513"}, -"538":{"name":"保险","parent":"513"}, -"539":{"name":"证券","parent":"513"}, -"540":{"name":"基金","parent":"513"}, -"541":{"name":"信托","parent":"513"}, -"542":{"name":"担保","parent":"513"}, -"543":{"name":"典当","parent":"513"}, -"544":{"name":"拍卖","parent":"513"}, -"545":{"name":"投资/融资","parent":"513"}, -"546":{"name":"期货","parent":"513"}, -"547":{"name":"房地产开发","parent":"514"}, -"548":{"name":"工程施工","parent":"514"}, -"549":{"name":"建筑设计","parent":"514"}, -"550":{"name":"房地产代理","parent":"514"}, -"551":{"name":"物业管理","parent":"514"}, -"552":{"name":"室内设计","parent":"514"}, -"553":{"name":"装修装潢","parent":"514"}, -"554":{"name":"市政工程","parent":"514"}, -"555":{"name":"工程造价","parent":"514"}, -"556":{"name":"工程监理","parent":"514"}, -"557":{"name":"环境工程","parent":"514"}, -"558":{"name":"园林景观","parent":"514"}, -"559":{"name":"法律","parent":"515"}, -"560":{"name":"人力资源","parent":"515"}, -"561":{"name":"会计","parent":"1125"}, -"562":{"name":"审计","parent":"515"}, -"563":{"name":"检测认证","parent":"515"}, -"565":{"name":"翻译","parent":"515"}, -"566":{"name":"中介","parent":"515"}, -"567":{"name":"咨询","parent":"515"}, -"568":{"name":"外包服务","parent":"515"}, -"569":{"name":"家教","parent":"516"}, -"570":{"name":"早教","parent":"516"}, -"571":{"name":"职业技能培训","parent":"516"}, -"572":{"name":"外语培训","parent":"516"}, -"573":{"name":"设计培训","parent":"516"}, -"574":{"name":"IT培训","parent":"516"}, -"575":{"name":"文艺体育培训","parent":"516"}, -"576":{"name":"学历教育","parent":"516"}, -"577":{"name":"管理培训","parent":"516"}, -"578":{"name":"民办基础教育","parent":"516"}, -"579":{"name":"广告","parent":"517"}, -"580":{"name":"媒体","parent":"517"}, -"581":{"name":"会展","parent":"517"}, -"582":{"name":"公关","parent":"517"}, -"583":{"name":"影视","parent":"517"}, -"584":{"name":"艺术","parent":"517"}, -"585":{"name":"文化传播","parent":"517"}, -"586":{"name":"娱乐","parent":"517"}, -"587":{"name":"体育","parent":"517"}, -"588":{"name":"出版","parent":"517"}, -"589":{"name":"休闲","parent":"517"}, -"590":{"name":"动漫","parent":"517"}, -"591":{"name":"市场推广","parent":"517"}, -"592":{"name":"市场研究","parent":"517"}, -"593":{"name":"食品","parent":"1129"}, -"594":{"name":"饮料","parent":"1129"}, -"595":{"name":"烟草","parent":"1129"}, -"596":{"name":"酒品","parent":"518"}, -"597":{"name":"服饰","parent":"518"}, -"598":{"name":"纺织","parent":"518"}, -"599":{"name":"化妆品","parent":"1129"}, -"600":{"name":"日用品","parent":"1129"}, -"601":{"name":"家电","parent":"518"}, -"602":{"name":"家具","parent":"518"}, -"603":{"name":"办公用品","parent":"518"}, -"604":{"name":"奢侈品","parent":"518"}, -"605":{"name":"珠宝","parent":"518"}, -"606":{"name":"数码产品","parent":"518"}, -"607":{"name":"玩具","parent":"518"}, -"608":{"name":"图书","parent":"518"}, -"609":{"name":"音像","parent":"518"}, -"610":{"name":"钟表","parent":"518"}, -"611":{"name":"箱包","parent":"518"}, -"612":{"name":"母婴","parent":"518"}, -"613":{"name":"营养保健","parent":"518"}, -"614":{"name":"户外用品","parent":"518"}, -"615":{"name":"健身器材","parent":"518"}, -"616":{"name":"乐器","parent":"518"}, -"617":{"name":"汽车用品","parent":"518"}, -"619":{"name":"厨具","parent":"518"}, -"620":{"name":"机械制造","parent":"519"}, -"621":{"name":"流体控制","parent":"519"}, -"622":{"name":"自动化控制","parent":"519"}, -"623":{"name":"仪器仪表","parent":"519"}, -"624":{"name":"航空/航天","parent":"519"}, -"625":{"name":"交通设施","parent":"519"}, -"626":{"name":"工业电子","parent":"519"}, -"627":{"name":"建材","parent":"519"}, -"628":{"name":"五金材料","parent":"519"}, -"629":{"name":"汽车","parent":"519"}, -"630":{"name":"印刷","parent":"519"}, -"631":{"name":"造纸","parent":"519"}, -"632":{"name":"包装","parent":"519"}, -"633":{"name":"原材料及加工","parent":"519"}, -"634":{"name":"物流","parent":"520"}, -"635":{"name":"仓储","parent":"520"}, -"636":{"name":"客运","parent":"520"}, -"637":{"name":"快递","parent":"520"}, -"638":{"name":"化学药","parent":"522"}, -"639":{"name":"中药","parent":"522"}, -"640":{"name":"生物制药","parent":"522"}, -"641":{"name":"兽药","parent":"522"}, -"642":{"name":"农药","parent":"522"}, -"643":{"name":"CRO","parent":"522"}, -"644":{"name":"消毒","parent":"522"}, -"645":{"name":"医药商业","parent":"522"}, -"646":{"name":"医疗服务","parent":"522"}, -"647":{"name":"医疗器械","parent":"523"}, -"648":{"name":"制药设备","parent":"523"}, -"649":{"name":"医用耗材","parent":"523"}, -"650":{"name":"手术器械","parent":"523"}, -"651":{"name":"保健器材","parent":"524"}, -"652":{"name":"性保健品","parent":"524"}, -"653":{"name":"医药保养","parent":"524"}, -"654":{"name":"医用保健","parent":"524"}, -"655":{"name":"酒店","parent":"525"}, -"656":{"name":"餐饮","parent":"525"}, -"657":{"name":"旅游","parent":"525"}, -"658":{"name":"生活服务","parent":"525"}, -"659":{"name":"保健服务","parent":"525"}, -"660":{"name":"运动健身","parent":"525"}, -"661":{"name":"家政服务","parent":"525"}, -"662":{"name":"婚庆服务","parent":"525"}, -"663":{"name":"租赁服务","parent":"525"}, -"664":{"name":"维修服务","parent":"525"}, -"665":{"name":"石油天然气","parent":"526"}, -"666":{"name":"电力","parent":"526"}, -"667":{"name":"新能源","parent":"526"}, -"668":{"name":"水利","parent":"526"}, -"669":{"name":"矿产","parent":"526"}, -"670":{"name":"采掘业","parent":"526"}, -"671":{"name":"冶炼","parent":"526"}, -"672":{"name":"环保","parent":"526"}, -"673":{"name":"无机化工原料","parent":"527"}, -"674":{"name":"有机化工原料","parent":"527"}, -"675":{"name":"精细化学品","parent":"527"}, -"676":{"name":"化工设备","parent":"527"}, -"677":{"name":"化工工程","parent":"527"}, -"678":{"name":"资产管理","parent":"513"}, -"679":{"name":"金融租赁","parent":"513"}, -"680":{"name":"征信及信评机构","parent":"513"}, -"681":{"name":"资产评估机构","parent":"513"}, -"683":{"name":"金融监管机构","parent":"513"}, -"684":{"name":"国际贸易","parent":"521"}, -"685":{"name":"海关","parent":"521"}, -"686":{"name":"购物中心","parent":"536"}, -"687":{"name":"超市","parent":"536"}, -"688":{"name":"便利店","parent":"536"}, -"689":{"name":"专卖店","parent":"536"}, -"690":{"name":"专业店","parent":"536"}, -"691":{"name":"百货店","parent":"536"}, -"692":{"name":"杂货店","parent":"536"}, -"693":{"name":"个人银行","parent":"537"}, -"695":{"name":"私人银行","parent":"537"}, -"696":{"name":"公司银行","parent":"537"}, -"697":{"name":"投资银行","parent":"537"}, -"698":{"name":"政策性银行","parent":"537"}, -"699":{"name":"中央银行","parent":"537"}, -"700":{"name":"人寿险","parent":"538"}, -"701":{"name":"财产险","parent":"538"}, -"702":{"name":"再保险","parent":"538"}, -"703":{"name":"养老险","parent":"538"}, -"704":{"name":"保险代理公司","parent":"538"}, -"705":{"name":"公募基金","parent":"540"}, -"707":{"name":"私募基金","parent":"540"}, -"708":{"name":"第三方理财","parent":"679"}, -"709":{"name":"资产管理公司","parent":"679"}, -"711":{"name":"房产中介","parent":"566"}, -"712":{"name":"职业中介","parent":"566"}, -"713":{"name":"婚姻中介","parent":"566"}, -"714":{"name":"战略咨询","parent":"567"}, -"715":{"name":"投资咨询","parent":"567"}, -"716":{"name":"心理咨询","parent":"567"}, -"717":{"name":"留学移民咨询","parent":"567"}, -"718":{"name":"工商注册代理","parent":"568"}, -"719":{"name":"商标专利代理","parent":"568"}, -"720":{"name":"财务代理","parent":"568"}, -"721":{"name":"工程机械","parent":"620"}, -"722":{"name":"农业机械","parent":"620"}, -"723":{"name":"海工设备","parent":"620"}, -"724":{"name":"包装机械","parent":"620"}, -"725":{"name":"印刷机械","parent":"620"}, -"726":{"name":"数控机床","parent":"620"}, -"727":{"name":"矿山机械","parent":"620"}, -"728":{"name":"水泵","parent":"621"}, -"729":{"name":"管道","parent":"621"}, -"730":{"name":"阀门","parent":"621"}, -"732":{"name":"压缩机","parent":"621"}, -"733":{"name":"集散控制系统","parent":"622"}, -"734":{"name":"远程控制","parent":"622"}, -"735":{"name":"液压系统","parent":"622"}, -"736":{"name":"楼宇智能化","parent":"622"}, -"737":{"name":"飞机制造","parent":"624"}, -"738":{"name":"航空公司","parent":"624"}, -"739":{"name":"发动机","parent":"624"}, -"740":{"name":"复合材料","parent":"624"}, -"741":{"name":"高铁","parent":"625"}, -"742":{"name":"地铁","parent":"625"}, -"743":{"name":"信号传输","parent":"625"}, -"745":{"name":"结构材料","parent":"627"}, -"746":{"name":"装饰材料","parent":"627"}, -"747":{"name":"专用材料","parent":"627"}, -"749":{"name":"经销商集团","parent":"629"}, -"750":{"name":"整车制造","parent":"629"}, -"751":{"name":"汽车零配件","parent":"629"}, -"752":{"name":"外型设计","parent":"629"}, -"753":{"name":"平版印刷","parent":"630"}, -"754":{"name":"凸版印刷","parent":"630"}, -"755":{"name":"凹版印刷","parent":"630"}, -"756":{"name":"孔版印刷","parent":"630"}, -"757":{"name":"印刷用纸","parent":"631"}, -"758":{"name":"书写、制图及复制用纸","parent":"631"}, -"759":{"name":"包装用纸","parent":"631"}, -"760":{"name":"生活、卫生及装饰用纸","parent":"631"}, -"761":{"name":"技术用纸","parent":"631"}, -"762":{"name":"加工纸原纸","parent":"631"}, -"763":{"name":"食品包装","parent":"632"}, -"764":{"name":"医药包装","parent":"632"}, -"765":{"name":"日化包装","parent":"632"}, -"766":{"name":"物流包装","parent":"632"}, -"767":{"name":"礼品包装","parent":"632"}, -"768":{"name":"电子五金包装","parent":"632"}, -"769":{"name":"汽车服务","parent":"525"}, -"770":{"name":"汽车保养","parent":"769"}, -"771":{"name":"租车","parent":"769"}, -"773":{"name":"出租车","parent":"769"}, -"774":{"name":"代驾","parent":"769"}, -"775":{"name":"发电","parent":"666"}, -"777":{"name":"输配电","parent":"666"}, -"779":{"name":"风电","parent":"667"}, -"780":{"name":"光伏/太阳能","parent":"667"}, -"781":{"name":"生物质发电","parent":"667"}, -"782":{"name":"煤化工","parent":"667"}, -"783":{"name":"垃圾发电","parent":"667"}, -"784":{"name":"核电","parent":"667"}, -"785":{"name":"能源矿产","parent":"669"}, -"786":{"name":"金属矿产","parent":"669"}, -"787":{"name":"非金属矿产","parent":"669"}, -"788":{"name":"水气矿产","parent":"669"}, -"789":{"name":"锅炉","parent":"775"}, -"790":{"name":"发电机","parent":"775"}, -"791":{"name":"汽轮机","parent":"775"}, -"792":{"name":"燃机","parent":"775"}, -"793":{"name":"冷却","parent":"775"}, -"794":{"name":"电力设计院","parent":"775"}, -"795":{"name":"高压输配电","parent":"777"}, -"796":{"name":"中压输配电","parent":"777"}, -"797":{"name":"低压输配电","parent":"777"}, -"798":{"name":"继电保护","parent":"777"}, -"799":{"name":"智能电网","parent":"777"}, -"800":{"name":"小学","parent":"516"}, -"801":{"name":"电动车","parent":"519"}, -"802":{"name":"皮具箱包","parent":"518"}, -"803":{"name":"医药制造","parent":"522"}, -"804":{"name":"电器销售","parent":"536"}, -"805":{"name":"塑料制品","parent":"527"}, -"806":{"name":"公益基金会","parent":"530"}, -"807":{"name":"美发服务","parent":"525"}, -"808":{"name":"农业养殖","parent":"531"}, -"809":{"name":"金融服务","parent":"513"}, -"810":{"name":"商业地产综合体","parent":"514"}, -"811":{"name":"美容服务","parent":"525"}, -"812":{"name":"灯饰","parent":"518"}, -"813":{"name":"油墨颜料产品","parent":"527"}, -"814":{"name":"眼镜制造","parent":"518"}, -"815":{"name":"农业生物技术","parent":"531"}, -"816":{"name":"体育用品","parent":"518"}, -"817":{"name":"保健用品","parent":"524"}, -"818":{"name":"化学化工产品","parent":"527"}, -"819":{"name":"饲料","parent":"531"}, -"821":{"name":"保安服务","parent":"525"}, -"822":{"name":"干细胞技术","parent":"522"}, -"824":{"name":"农药化肥","parent":"527"}, -"825":{"name":"卫生洁具","parent":"518"}, -"826":{"name":"体育器材、场馆","parent":"518"}, -"827":{"name":"饲料加工","parent":"531"}, -"828":{"name":"测绘服务","parent":"529"}, -"830":{"name":"金属船舶制造","parent":"519"}, -"831":{"name":"基因工程","parent":"522"}, -"832":{"name":"花卉服务","parent":"536"}, -"833":{"name":"农业种植","parent":"531"}, -"834":{"name":"皮革制品","parent":"518"}, -"835":{"name":"地理信息加工服务","parent":"529"}, -"836":{"name":"机器人","parent":"519"}, -"837":{"name":"礼品","parent":"518"}, -"838":{"name":"理发及美容服务","parent":"525"}, -"839":{"name":"其他清洁服务","parent":"525"}, -"840":{"name":"硅胶材料","parent":"527"}, -"841":{"name":"茶叶销售","parent":"518"}, -"842":{"name":"彩票活动","parent":"529"}, -"843":{"name":"化妆培训","parent":"516"}, -"844":{"name":"鞋业","parent":"518"}, -"845":{"name":"酒店用品","parent":"518"}, -"846":{"name":"复合材料","parent":"527"}, -"847":{"name":"房地产工程建设","parent":"548"}, -"848":{"name":"知识产权服务","parent":"559"}, -"849":{"name":"新型建材","parent":"627"}, -"850":{"name":"企业投资咨询","parent":"567"}, -"851":{"name":"含乳饮料和植物蛋白饮料制造","parent":"594"}, -"852":{"name":"汽车检测设备","parent":"629"}, -"853":{"name":"手机通讯器材","parent":"417"}, -"854":{"name":"环保材料","parent":"672"}, -"855":{"name":"交通设施","parent":"554"}, -"856":{"name":"电子器件","parent":"419"}, -"857":{"name":"啤酒","parent":"594"}, -"858":{"name":"生态旅游","parent":"657"}, -"859":{"name":"自动化设备","parent":"626"}, -"860":{"name":"软件开发","parent":"414"}, -"861":{"name":"葡萄酒销售","parent":"594"}, -"862":{"name":"钢材","parent":"633"}, -"863":{"name":"餐饮培训","parent":"656"}, -"864":{"name":"速冻食品","parent":"593"}, -"865":{"name":"空气环保","parent":"672"}, -"866":{"name":"互联网房地产经纪服务","parent":"550"}, -"867":{"name":"食品添加剂","parent":"593"}, -"868":{"name":"演艺传播","parent":"585"}, -"869":{"name":"信用卡","parent":"537"}, -"870":{"name":"报纸期刊广告","parent":"579"}, -"871":{"name":"摄影","parent":"525"}, -"872":{"name":"手机软件","parent":"414"}, -"873":{"name":"地坪建材","parent":"627"}, -"874":{"name":"企业管理咨询","parent":"567"}, -"875":{"name":"幼儿教育","parent":"570"}, -"876":{"name":"系统集成","parent":"416"}, -"877":{"name":"皮革服饰","parent":"597"}, -"878":{"name":"保健食品","parent":"593"}, -"879":{"name":"叉车","parent":"620"}, -"880":{"name":"厨卫电器","parent":"601"}, -"882":{"name":"地暖设备","parent":"627"}, -"883":{"name":"钢结构制造","parent":"548"}, -"884":{"name":"投影机","parent":"606"}, -"885":{"name":"啤酒销售","parent":"594"}, -"886":{"name":"度假村旅游","parent":"657"}, -"887":{"name":"电力元件设备","parent":"626"}, -"888":{"name":"管理软件","parent":"414"}, -"889":{"name":"轴承","parent":"628"}, -"890":{"name":"餐饮设备","parent":"656"}, -"891":{"name":"肉制品及副产品加工","parent":"593"}, -"892":{"name":"艺术收藏品投资交易","parent":"584"}, -"893":{"name":"净水器","parent":"601"}, -"894":{"name":"进口食品","parent":"593"}, -"895":{"name":"娱乐文化传播","parent":"585"}, -"896":{"name":"文化传播","parent":"585"}, -"897":{"name":"商旅传媒","parent":"580"}, -"898":{"name":"广告设计制作","parent":"579"}, -"899":{"name":"金属丝绳及其制品制造","parent":"627"}, -"900":{"name":"建筑涂料","parent":"627"}, -"901":{"name":"抵押贷款","parent":"543"}, -"902":{"name":"早教","parent":"570"}, -"903":{"name":"电影放映","parent":"583"}, -"904":{"name":"内衣服饰","parent":"597"}, -"905":{"name":"无线网络通信","parent":"418"}, -"906":{"name":"记忆卡","parent":"415"}, -"907":{"name":"女装服饰","parent":"597"}, -"908":{"name":"建筑机械","parent":"620"}, -"909":{"name":"制冷电器","parent":"601"}, -"910":{"name":"通信设备","parent":"417"}, -"911":{"name":"空调设备","parent":"601"}, -"912":{"name":"建筑装饰","parent":"553"}, -"913":{"name":"办公设备","parent":"603"}, -"916":{"name":"数据处理软件","parent":"414"}, -"917":{"name":"葡萄酒贸易","parent":"594"}, -"918":{"name":"通讯器材","parent":"417"}, -"919":{"name":"铜业","parent":"633"}, -"920":{"name":"食堂","parent":"656"}, -"921":{"name":"糖果零食","parent":"593"}, -"922":{"name":"文化艺术传播","parent":"584"}, -"923":{"name":"太阳能电器","parent":"601"}, -"924":{"name":"药品零售","parent":"645"}, -"925":{"name":"果蔬食品","parent":"593"}, -"926":{"name":"文化活动策划","parent":"585"}, -"928":{"name":"汽车广告","parent":"657"}, -"929":{"name":"条码设备","parent":"630"}, -"930":{"name":"建筑石材","parent":"627"}, -"931":{"name":"贵金属","parent":"545"}, -"932":{"name":"体育","parent":"660"}, -"933":{"name":"金融信息服务","parent":"414"}, -"934":{"name":"玻璃建材","parent":"627"}, -"935":{"name":"家教","parent":"569"}, -"936":{"name":"歌舞厅娱乐活动","parent":"586"}, -"937":{"name":"计算机服务器","parent":"415"}, -"938":{"name":"管道","parent":"627"}, -"939":{"name":"婴幼儿服饰","parent":"597"}, -"940":{"name":"热水器","parent":"601"}, -"941":{"name":"计算机及零部件制造","parent":"415"}, -"942":{"name":"钢铁贸易","parent":"633"}, -"944":{"name":"包装材料","parent":"632"}, -"945":{"name":"计算机办公设备","parent":"603"}, -"946":{"name":"白酒","parent":"594"}, -"948":{"name":"发动机","parent":"620"}, -"949":{"name":"快餐服务","parent":"656"}, -"950":{"name":"酒类销售","parent":"594"}, -"951":{"name":"电子产品、机电设备","parent":"626"}, -"952":{"name":"激光设备","parent":"626"}, -"953":{"name":"餐饮策划","parent":"656"}, -"954":{"name":"饮料、食品","parent":"594"}, -"955":{"name":"文化娱乐经纪","parent":"585"}, -"956":{"name":"天然气","parent":"665"}, -"957":{"name":"农副食品","parent":"593"}, -"958":{"name":"艺术表演","parent":"585"}, -"959":{"name":"石膏、水泥制品及类似制品制造","parent":"627"}, -"960":{"name":"橱柜","parent":"602"}, -"961":{"name":"管理培训","parent":"577"}, -"962":{"name":"男装服饰","parent":"597"}, -"963":{"name":"化肥制造","parent":"675"}, -"964":{"name":"童装服饰","parent":"597"}, -"965":{"name":"电源电池","parent":"626"}, -"966":{"name":"家电维修","parent":"664"}, -"967":{"name":"光电子器件","parent":"419"}, -"968":{"name":"旅行社服务","parent":"657"}, -"969":{"name":"电线、电缆制造","parent":"626"}, -"970":{"name":"软件开发、信息系统集成","parent":"419"}, -"971":{"name":"白酒制造","parent":"594"}, -"973":{"name":"甜品服务","parent":"656"}, -"974":{"name":"糕点、面包制造","parent":"593"}, -"975":{"name":"木工机械","parent":"620"}, -"976":{"name":"酒吧服务","parent":"656"}, -"977":{"name":"火腿肠","parent":"593"}, -"978":{"name":"广告策划推广","parent":"579"}, -"979":{"name":"新能源产品和生产装备制造","parent":"667"}, -"980":{"name":"调味品","parent":"593"}, -"981":{"name":"礼仪表演","parent":"585"}, -"982":{"name":"劳务派遣","parent":"560"}, -"983":{"name":"建材零售","parent":"627"}, -"984":{"name":"商品交易中心","parent":"545"}, -"985":{"name":"体育推广","parent":"585"}, -"986":{"name":"茶饮料及其他饮料制造","parent":"594"}, -"987":{"name":"金属建材","parent":"627"}, -"988":{"name":"职业技能培训","parent":"571"}, -"989":{"name":"网吧活动","parent":"586"}, -"990":{"name":"洗衣服务","parent":"658"}, -"991":{"name":"管道工程","parent":"554"}, -"992":{"name":"通信工程","parent":"417"}, -"993":{"name":"电子元器件","parent":"626"}, -"994":{"name":"电子设备","parent":"419"}, -"995":{"name":"茶馆服务","parent":"656"}, -"996":{"name":"旅游开发","parent":"657"}, -"997":{"name":"视频通讯","parent":"417"}, -"998":{"name":"白酒销售","parent":"594"}, -"1000":{"name":"咖啡馆服务","parent":"656"}, -"1001":{"name":"食品零售","parent":"593"}, -"1002":{"name":"健康疗养旅游","parent":"655"}, -"1003":{"name":"粮油食品","parent":"593"}, -"1004":{"name":"儿童教育影视","parent":"583"}, -"1005":{"name":"新能源发电","parent":"667"}, -"1006":{"name":"旅游策划","parent":"657"}, -"1007":{"name":"绘画","parent":"575"}, -"1008":{"name":"方便面及其他方便食品","parent":"593"}, -"1009":{"name":"房地产经纪","parent":"550"}, -"1010":{"name":"母婴家政","parent":"661"}, -"1011":{"name":"居家养老健康服务","parent":"661"}, -"1012":{"name":"文化艺术投资","parent":"545"}, -"1013":{"name":"运动健身","parent":"660"}, -"1014":{"name":"瓶(罐)装饮用水制造","parent":"594"}, -"1015":{"name":"金属门窗","parent":"627"}, -"1016":{"name":"机动车检测","parent":"563"}, -"1017":{"name":"货物运输","parent":"634"}, -"1018":{"name":"服饰专卖","parent":"690"}, -"1019":{"name":"酒店服装","parent":"597"}, -"1020":{"name":"通讯软件","parent":"417"}, -"1021":{"name":"消防工程","parent":"554"}, -"1022":{"name":"嵌入式电子系统","parent":"419"}, -"1023":{"name":"航空票务","parent":"636"}, -"1024":{"name":"电气设备","parent":"626"}, -"1025":{"name":"酒业贸易","parent":"594"}, -"1027":{"name":"其他饮料及冷饮服务","parent":"656"}, -"1028":{"name":"乳制品","parent":"593"}, -"1029":{"name":"新闻期刊出版","parent":"588"}, -"1030":{"name":"水污染治理","parent":"672"}, -"1031":{"name":"谷物食品","parent":"593"}, -"1032":{"name":"数字动漫设计制造服务","parent":"590"}, -"1033":{"name":"医院","parent":"646"}, -"1034":{"name":"旅游广告","parent":"657"}, -"1035":{"name":"办公家具","parent":"602"}, -"1036":{"name":"房地产营销策划","parent":"550"}, -"1037":{"name":"保洁家政","parent":"661"}, -"1038":{"name":"水泥制造","parent":"627"}, -"1039":{"name":"市场研究咨询","parent":"567"}, -"1040":{"name":"驾校","parent":"571"}, -"1041":{"name":"正餐服务","parent":"656"}, -"1043":{"name":"机动车燃油","parent":"665"}, -"1044":{"name":"食品","parent":"593"}, -"1045":{"name":"新能源汽车","parent":"629"}, -"1046":{"name":"手机无线网络推广","parent":"417"}, -"1047":{"name":"环保设备","parent":"672"}, -"1048":{"name":"通讯工程","parent":"418"}, -"1049":{"name":"半导体集成电路","parent":"419"}, -"1050":{"name":"航空服务","parent":"636"}, -"1051":{"name":"电机设备","parent":"626"}, -"1052":{"name":"档案软件","parent":"414"}, -"1053":{"name":"冷链物流服务","parent":"634"}, -"1054":{"name":"小吃服务","parent":"656"}, -"1055":{"name":"水产品加工","parent":"593"}, -"1056":{"name":"图书出版","parent":"588"}, -"1057":{"name":"固体废物治理","parent":"672"}, -"1059":{"name":"坚果食品","parent":"593"}, -"1060":{"name":"广告传媒","parent":"579"}, -"1061":{"name":"电梯","parent":"622"}, -"1062":{"name":"社区医疗与卫生院","parent":"646"}, -"1063":{"name":"广告、印刷包装","parent":"630"}, -"1064":{"name":"婚纱礼服","parent":"662"}, -"1065":{"name":"地毯","parent":"602"}, -"1066":{"name":"互联网物业","parent":"551"}, -"1067":{"name":"跨境电商","parent":"3"}, -"1068":{"name":"信息安全、系统集成","parent":"9"}, -"1069":{"name":"专用汽车制造","parent":"750"}, -"1070":{"name":"商品贸易","parent":"3"}, -"1071":{"name":"墙壁装饰材料","parent":"746"}, -"1072":{"name":"窗帘装饰材料","parent":"746"}, -"1073":{"name":"电子商务、本地生活服务","parent":"3"}, -"1075":{"name":"白酒电子商务","parent":"3"}, -"1076":{"name":"商品贸易、电子商务","parent":"3"}, -"1077":{"name":"木质装饰材料","parent":"746"}, -"1078":{"name":"电子商务、汽车电商交易平台","parent":"3"}, -"1079":{"name":"汽车轮胎","parent":"751"}, -"1080":{"name":"气体压缩机械制造","parent":"732"}, -"1081":{"name":"家装家具电子商务","parent":"3"}, -"1082":{"name":"化妆品电子商务","parent":"3"}, -"1083":{"name":"汽车销售","parent":"749"}, -"1084":{"name":"新闻资讯网站","parent":"510"}, -"1085":{"name":"母婴电商","parent":"3"}, -"1086":{"name":"电商商务、收藏品交易","parent":"3"}, -"1088":{"name":"电子商务、数码产品","parent":"3"}, -"1089":{"name":"二手车交易","parent":"749"}, -"1090":{"name":"游戏制作服务","parent":"5"}, -"1091":{"name":"母婴服务","parent":"510"}, -"1092":{"name":"家具电子商务","parent":"3"}, -"1093":{"name":"汽车配件电子商务","parent":"3"}, -"1094":{"name":"输配电设备","parent":"777"}, -"1095":{"name":"矿山设备","parent":"727"}, -"1096":{"name":"机床机械","parent":"726"}, -"1097":{"name":"农产品电商","parent":"3"}, -"1098":{"name":"陶瓷装饰材料","parent":"746"}, -"1099":{"name":"车载联网设备","parent":"487"}, -"1100":{"name":"汽车销售电子商务","parent":"3"}, -"1101":{"name":"石油设备","parent":"730"}, -"1102":{"name":"智能家居","parent":"487"}, -"1103":{"name":"散热器","parent":"751"}, -"1104":{"name":"电力工程","parent":"775"}, -"1105":{"name":"生鲜电商","parent":"3"}, -"1106":{"name":"互联网数据服务","parent":"490"}, -"1107":{"name":"房车、商务车销售","parent":"749"}, -"1108":{"name":"茶叶电子商务","parent":"3"}, -"1109":{"name":"酒类电子商务","parent":"3"}, -"1110":{"name":"阀门","parent":"730"}, -"1111":{"name":"食品电商","parent":"3"}, -"1112":{"name":"儿童摄影","parent":"871"}, -"1113":{"name":"广告摄影","parent":"871"}, -"1114":{"name":"婚纱摄影","parent":"871"}, -"1115":{"name":"模具制造","parent":"620"}, -"1116":{"name":"汽车模具","parent":"629"}, -"1117":{"name":"认证咨询","parent":"567"}, -"1118":{"name":"数字视觉制作服务","parent":"590"}, -"1119":{"name":"牙科及医疗器械","parent":"646"}, -"1120":{"name":"猎头招聘","parent":"560"}, -"1121":{"name":"家居","parent":"518"}, -"1122":{"name":"收藏品","parent":"518"}, -"1123":{"name":"首饰","parent":"518"}, -"1124":{"name":"工艺品","parent":"518"}, -"1125":{"name":"财务","parent":"515"}, -"1126":{"name":"税务","parent":"515"}, -"1127":{"name":"分类信息","parent":"2"}, -"1128":{"name":"宠物","parent":"0"}, -"1129":{"name":"快消品","parent":"518"}, -"1130":{"name":"人工智能","parent":"2"}, -"1131":{"name":"农/林/牧/渔","parent":"0"} +TBL = { + "1": {"name": "IT/通信/电子", "parent": "0"}, + "2": {"name": "互联网", "parent": "0"}, + "3": {"name": "电子商务", "parent": "2"}, + "4": {"name": "互联网金融", "parent": "2"}, + "5": {"name": "网络游戏", "parent": "2"}, + "6": {"name": "社交网络平台", "parent": "2"}, + "7": {"name": "视频音乐", "parent": "2"}, + "9": {"name": "安全", "parent": "2"}, + "10": {"name": "云计算", "parent": "2"}, + "12": {"name": "工具类客户端应用", "parent": "2"}, + "13": {"name": "互联网广告", "parent": "2"}, + "14": {"name": "企业互联网服务", "parent": "2"}, + "16": {"name": "在线教育", "parent": "2"}, + "17": {"name": "在线医疗", "parent": "2"}, + "19": {"name": "B2B", "parent": "3"}, + "20": {"name": "B2C", "parent": "3"}, + "21": {"name": "C2C", "parent": "3"}, + "22": {"name": "生活信息本地化", "parent": "3"}, + "23": {"name": "在线旅游", "parent": "2"}, + "24": {"name": "第三方支付", "parent": "4"}, + "26": {"name": "客户端游戏", "parent": "5"}, + "27": {"name": "网页游戏", "parent": "5"}, + "28": {"name": "手机游戏", "parent": "5"}, + "29": {"name": "微博", "parent": "6"}, + "30": {"name": "社交网站", "parent": "6"}, + "31": {"name": "在线视频", "parent": "7"}, + "32": {"name": "在线音乐", "parent": "7"}, + "35": {"name": "企业安全", "parent": "9"}, + "36": {"name": "个人安全", "parent": "9"}, + "37": {"name": "企业级云服务", "parent": "10"}, + "38": {"name": "个人级云服务", "parent": "10"}, + "43": {"name": "输入法", "parent": "12"}, + "44": {"name": "浏览器", "parent": "12"}, + "45": {"name": "词典", "parent": "12"}, + "46": {"name": "播放器", "parent": "12"}, + "47": {"name": "下载器", "parent": "12"}, + "48": {"name": "IM", "parent": "12"}, + "49": {"name": "广告服务", "parent": "13"}, + "50": {"name": "第三方广告网络平台", "parent": "13"}, + "51": {"name": "媒体代理", "parent": "13"}, + "52": {"name": "创意代理", "parent": "13"}, + "53": {"name": "IT-综合", "parent": "1"}, + "71": {"name": "团购", "parent": "3"}, + "72": {"name": "地图", "parent": "2"}, + "73": {"name": "数据存储", "parent": "2"}, + "414": {"name": "计算机软件", "parent": "1"}, + "415": {"name": "计算机硬件", "parent": "1"}, + "416": {"name": "计算机服务(系统、数据服务、维修)", "parent": "1"}, + "417": {"name": "通信/电信/网络设备", "parent": "1"}, + "418": {"name": "通信/电信运营、增值服务", "parent": "1"}, + "419": {"name": "电子技术/半导体/集成电路", "parent": "1"}, + "472": {"name": "P2P网贷", "parent": "4"}, + "473": {"name": "互联网理财", "parent": "4"}, + "474": {"name": "婚恋", "parent": "6"}, + "476": {"name": "虚拟化", "parent": "10"}, + "477": {"name": "邮箱", "parent": "12"}, + "478": {"name": "商业智能", "parent": "14"}, + "479": {"name": "企业建站", "parent": "14"}, + "480": {"name": "安防", "parent": "14"}, + "481": {"name": "网络营销", "parent": "2"}, + "487": {"name": "智能终端", "parent": "2"}, + "488": {"name": "移动互联网", "parent": "2"}, + "489": {"name": "数字城市", "parent": "2"}, + "490": {"name": "大数据", "parent": "2"}, + "491": {"name": "互联网人力资源", "parent": "2"}, + "492": {"name": "舆情监控", "parent": "2"}, + "493": {"name": "移动营销", "parent": "481"}, + "494": {"name": "微博营销", "parent": "481"}, + "495": {"name": "精准营销", "parent": "481"}, + "496": {"name": "海外营销", "parent": "481"}, + "497": {"name": "微信营销", "parent": "481"}, + "498": {"name": "智能手机", "parent": "487"}, + "499": {"name": "可穿戴设备", "parent": "487"}, + "500": {"name": "智能电视", "parent": "487"}, + "501": {"name": "WAP", "parent": "488"}, + "502": {"name": "物联网", "parent": "489"}, + "503": {"name": "O2O", "parent": "489"}, + "504": {"name": "数字出版", "parent": "489"}, + "505": {"name": "搜索", "parent": "2"}, + "506": {"name": "垂直搜索", "parent": "505"}, + "507": {"name": "无线搜索", "parent": "505"}, + "508": {"name": "网页搜索", "parent": "505"}, + "509": {"name": "网址导航", "parent": "2"}, + "510": {"name": "门户", "parent": "2"}, + "511": {"name": "网络文学", "parent": "2"}, + "512": {"name": "自媒体", "parent": "2"}, + "513": {"name": "金融", "parent": "0"}, + "514": {"name": "建筑与房地产", "parent": "0"}, + "515": {"name": "专业服务", "parent": "0"}, + "516": {"name": "教育培训", "parent": "0"}, + "517": {"name": "文化传媒", "parent": "0"}, + "518": {"name": "消费品", "parent": "0"}, + "519": {"name": "工业", "parent": "0"}, + "520": {"name": "交通物流", "parent": "0"}, + "521": {"name": "贸易", "parent": "0"}, + "522": {"name": "医药", "parent": "0"}, + "523": {"name": "医疗器械", "parent": "522"}, + "524": {"name": "保健品", "parent": "518"}, + "525": {"name": "服务业", "parent": "0"}, + "526": {"name": "能源/矿产/环保", "parent": "0"}, + "527": {"name": "化工", "parent": "0"}, + "528": {"name": "政府", "parent": "0"}, + "529": {"name": "公共事业", "parent": "0"}, + "530": {"name": "非盈利机构", "parent": "0"}, + "531": {"name": "农业", "parent": "1131"}, + "532": {"name": "林业", "parent": "1131"}, + "533": {"name": "畜牧业", "parent": "1131"}, + "534": {"name": "渔业", "parent": "1131"}, + "535": {"name": "学术科研", "parent": "0"}, + "536": {"name": "零售", "parent": "0"}, + "537": {"name": "银行", "parent": "513"}, + "538": {"name": "保险", "parent": "513"}, + "539": {"name": "证券", "parent": "513"}, + "540": {"name": "基金", "parent": "513"}, + "541": {"name": "信托", "parent": "513"}, + "542": {"name": "担保", "parent": "513"}, + "543": {"name": "典当", "parent": "513"}, + "544": {"name": "拍卖", "parent": "513"}, + "545": {"name": "投资/融资", "parent": "513"}, + "546": {"name": "期货", "parent": "513"}, + "547": {"name": "房地产开发", "parent": "514"}, + "548": {"name": "工程施工", "parent": "514"}, + "549": {"name": "建筑设计", "parent": "514"}, + "550": {"name": "房地产代理", "parent": "514"}, + "551": {"name": "物业管理", "parent": "514"}, + "552": {"name": "室内设计", "parent": "514"}, + "553": {"name": "装修装潢", "parent": "514"}, + "554": {"name": "市政工程", "parent": "514"}, + "555": {"name": "工程造价", "parent": "514"}, + "556": {"name": "工程监理", "parent": "514"}, + "557": {"name": "环境工程", "parent": "514"}, + "558": {"name": "园林景观", "parent": "514"}, + "559": {"name": "法律", "parent": "515"}, + "560": {"name": "人力资源", "parent": "515"}, + "561": {"name": "会计", "parent": "1125"}, + "562": {"name": "审计", "parent": "515"}, + "563": {"name": "检测认证", "parent": "515"}, + "565": {"name": "翻译", "parent": "515"}, + "566": {"name": "中介", "parent": "515"}, + "567": {"name": "咨询", "parent": "515"}, + "568": {"name": "外包服务", "parent": "515"}, + "569": {"name": "家教", "parent": "516"}, + "570": {"name": "早教", "parent": "516"}, + "571": {"name": "职业技能培训", "parent": "516"}, + "572": {"name": "外语培训", "parent": "516"}, + "573": {"name": "设计培训", "parent": "516"}, + "574": {"name": "IT培训", "parent": "516"}, + "575": {"name": "文艺体育培训", "parent": "516"}, + "576": {"name": "学历教育", "parent": "516"}, + "577": {"name": "管理培训", "parent": "516"}, + "578": {"name": "民办基础教育", "parent": "516"}, + "579": {"name": "广告", "parent": "517"}, + "580": {"name": "媒体", "parent": "517"}, + "581": {"name": "会展", "parent": "517"}, + "582": {"name": "公关", "parent": "517"}, + "583": {"name": "影视", "parent": "517"}, + "584": {"name": "艺术", "parent": "517"}, + "585": {"name": "文化传播", "parent": "517"}, + "586": {"name": "娱乐", "parent": "517"}, + "587": {"name": "体育", "parent": "517"}, + "588": {"name": "出版", "parent": "517"}, + "589": {"name": "休闲", "parent": "517"}, + "590": {"name": "动漫", "parent": "517"}, + "591": {"name": "市场推广", "parent": "517"}, + "592": {"name": "市场研究", "parent": "517"}, + "593": {"name": "食品", "parent": "1129"}, + "594": {"name": "饮料", "parent": "1129"}, + "595": {"name": "烟草", "parent": "1129"}, + "596": {"name": "酒品", "parent": "518"}, + "597": {"name": "服饰", "parent": "518"}, + "598": {"name": "纺织", "parent": "518"}, + "599": {"name": "化妆品", "parent": "1129"}, + "600": {"name": "日用品", "parent": "1129"}, + "601": {"name": "家电", "parent": "518"}, + "602": {"name": "家具", "parent": "518"}, + "603": {"name": "办公用品", "parent": "518"}, + "604": {"name": "奢侈品", "parent": "518"}, + "605": {"name": "珠宝", "parent": "518"}, + "606": {"name": "数码产品", "parent": "518"}, + "607": {"name": "玩具", "parent": "518"}, + "608": {"name": "图书", "parent": "518"}, + "609": {"name": "音像", "parent": "518"}, + "610": {"name": "钟表", "parent": "518"}, + "611": {"name": "箱包", "parent": "518"}, + "612": {"name": "母婴", "parent": "518"}, + "613": {"name": "营养保健", "parent": "518"}, + "614": {"name": "户外用品", "parent": "518"}, + "615": {"name": "健身器材", "parent": "518"}, + "616": {"name": "乐器", "parent": "518"}, + "617": {"name": "汽车用品", "parent": "518"}, + "619": {"name": "厨具", "parent": "518"}, + "620": {"name": "机械制造", "parent": "519"}, + "621": {"name": "流体控制", "parent": "519"}, + "622": {"name": "自动化控制", "parent": "519"}, + "623": {"name": "仪器仪表", "parent": "519"}, + "624": {"name": "航空/航天", "parent": "519"}, + "625": {"name": "交通设施", "parent": "519"}, + "626": {"name": "工业电子", "parent": "519"}, + "627": {"name": "建材", "parent": "519"}, + "628": {"name": "五金材料", "parent": "519"}, + "629": {"name": "汽车", "parent": "519"}, + "630": {"name": "印刷", "parent": "519"}, + "631": {"name": "造纸", "parent": "519"}, + "632": {"name": "包装", "parent": "519"}, + "633": {"name": "原材料及加工", "parent": "519"}, + "634": {"name": "物流", "parent": "520"}, + "635": {"name": "仓储", "parent": "520"}, + "636": {"name": "客运", "parent": "520"}, + "637": {"name": "快递", "parent": "520"}, + "638": {"name": "化学药", "parent": "522"}, + "639": {"name": "中药", "parent": "522"}, + "640": {"name": "生物制药", "parent": "522"}, + "641": {"name": "兽药", "parent": "522"}, + "642": {"name": "农药", "parent": "522"}, + "643": {"name": "CRO", "parent": "522"}, + "644": {"name": "消毒", "parent": "522"}, + "645": {"name": "医药商业", "parent": "522"}, + "646": {"name": "医疗服务", "parent": "522"}, + "647": {"name": "医疗器械", "parent": "523"}, + "648": {"name": "制药设备", "parent": "523"}, + "649": {"name": "医用耗材", "parent": "523"}, + "650": {"name": "手术器械", "parent": "523"}, + "651": {"name": "保健器材", "parent": "524"}, + "652": {"name": "性保健品", "parent": "524"}, + "653": {"name": "医药保养", "parent": "524"}, + "654": {"name": "医用保健", "parent": "524"}, + "655": {"name": "酒店", "parent": "525"}, + "656": {"name": "餐饮", "parent": "525"}, + "657": {"name": "旅游", "parent": "525"}, + "658": {"name": "生活服务", "parent": "525"}, + "659": {"name": "保健服务", "parent": "525"}, + "660": {"name": "运动健身", "parent": "525"}, + "661": {"name": "家政服务", "parent": "525"}, + "662": {"name": "婚庆服务", "parent": "525"}, + "663": {"name": "租赁服务", "parent": "525"}, + "664": {"name": "维修服务", "parent": "525"}, + "665": {"name": "石油天然气", "parent": "526"}, + "666": {"name": "电力", "parent": "526"}, + "667": {"name": "新能源", "parent": "526"}, + "668": {"name": "水利", "parent": "526"}, + "669": {"name": "矿产", "parent": "526"}, + "670": {"name": "采掘业", "parent": "526"}, + "671": {"name": "冶炼", "parent": "526"}, + "672": {"name": "环保", "parent": "526"}, + "673": {"name": "无机化工原料", "parent": "527"}, + "674": {"name": "有机化工原料", "parent": "527"}, + "675": {"name": "精细化学品", "parent": "527"}, + "676": {"name": "化工设备", "parent": "527"}, + "677": {"name": "化工工程", "parent": "527"}, + "678": {"name": "资产管理", "parent": "513"}, + "679": {"name": "金融租赁", "parent": "513"}, + "680": {"name": "征信及信评机构", "parent": "513"}, + "681": {"name": "资产评估机构", "parent": "513"}, + "683": {"name": "金融监管机构", "parent": "513"}, + "684": {"name": "国际贸易", "parent": "521"}, + "685": {"name": "海关", "parent": "521"}, + "686": {"name": "购物中心", "parent": "536"}, + "687": {"name": "超市", "parent": "536"}, + "688": {"name": "便利店", "parent": "536"}, + "689": {"name": "专卖店", "parent": "536"}, + "690": {"name": "专业店", "parent": "536"}, + "691": {"name": "百货店", "parent": "536"}, + "692": {"name": "杂货店", "parent": "536"}, + "693": {"name": "个人银行", "parent": "537"}, + "695": {"name": "私人银行", "parent": "537"}, + "696": {"name": "公司银行", "parent": "537"}, + "697": {"name": "投资银行", "parent": "537"}, + "698": {"name": "政策性银行", "parent": "537"}, + "699": {"name": "中央银行", "parent": "537"}, + "700": {"name": "人寿险", "parent": "538"}, + "701": {"name": "财产险", "parent": "538"}, + "702": {"name": "再保险", "parent": "538"}, + "703": {"name": "养老险", "parent": "538"}, + "704": {"name": "保险代理公司", "parent": "538"}, + "705": {"name": "公募基金", "parent": "540"}, + "707": {"name": "私募基金", "parent": "540"}, + "708": {"name": "第三方理财", "parent": "679"}, + "709": {"name": "资产管理公司", "parent": "679"}, + "711": {"name": "房产中介", "parent": "566"}, + "712": {"name": "职业中介", "parent": "566"}, + "713": {"name": "婚姻中介", "parent": "566"}, + "714": {"name": "战略咨询", "parent": "567"}, + "715": {"name": "投资咨询", "parent": "567"}, + "716": {"name": "心理咨询", "parent": "567"}, + "717": {"name": "留学移民咨询", "parent": "567"}, + "718": {"name": "工商注册代理", "parent": "568"}, + "719": {"name": "商标专利代理", "parent": "568"}, + "720": {"name": "财务代理", "parent": "568"}, + "721": {"name": "工程机械", "parent": "620"}, + "722": {"name": "农业机械", "parent": "620"}, + "723": {"name": "海工设备", "parent": "620"}, + "724": {"name": "包装机械", "parent": "620"}, + "725": {"name": "印刷机械", "parent": "620"}, + "726": {"name": "数控机床", "parent": "620"}, + "727": {"name": "矿山机械", "parent": "620"}, + "728": {"name": "水泵", "parent": "621"}, + "729": {"name": "管道", "parent": "621"}, + "730": {"name": "阀门", "parent": "621"}, + "732": {"name": "压缩机", "parent": "621"}, + "733": {"name": "集散控制系统", "parent": "622"}, + "734": {"name": "远程控制", "parent": "622"}, + "735": {"name": "液压系统", "parent": "622"}, + "736": {"name": "楼宇智能化", "parent": "622"}, + "737": {"name": "飞机制造", "parent": "624"}, + "738": {"name": "航空公司", "parent": "624"}, + "739": {"name": "发动机", "parent": "624"}, + "740": {"name": "复合材料", "parent": "624"}, + "741": {"name": "高铁", "parent": "625"}, + "742": {"name": "地铁", "parent": "625"}, + "743": {"name": "信号传输", "parent": "625"}, + "745": {"name": "结构材料", "parent": "627"}, + "746": {"name": "装饰材料", "parent": "627"}, + "747": {"name": "专用材料", "parent": "627"}, + "749": {"name": "经销商集团", "parent": "629"}, + "750": {"name": "整车制造", "parent": "629"}, + "751": {"name": "汽车零配件", "parent": "629"}, + "752": {"name": "外型设计", "parent": "629"}, + "753": {"name": "平版印刷", "parent": "630"}, + "754": {"name": "凸版印刷", "parent": "630"}, + "755": {"name": "凹版印刷", "parent": "630"}, + "756": {"name": "孔版印刷", "parent": "630"}, + "757": {"name": "印刷用纸", "parent": "631"}, + "758": {"name": "书写、制图及复制用纸", "parent": "631"}, + "759": {"name": "包装用纸", "parent": "631"}, + "760": {"name": "生活、卫生及装饰用纸", "parent": "631"}, + "761": {"name": "技术用纸", "parent": "631"}, + "762": {"name": "加工纸原纸", "parent": "631"}, + "763": {"name": "食品包装", "parent": "632"}, + "764": {"name": "医药包装", "parent": "632"}, + "765": {"name": "日化包装", "parent": "632"}, + "766": {"name": "物流包装", "parent": "632"}, + "767": {"name": "礼品包装", "parent": "632"}, + "768": {"name": "电子五金包装", "parent": "632"}, + "769": {"name": "汽车服务", "parent": "525"}, + "770": {"name": "汽车保养", "parent": "769"}, + "771": {"name": "租车", "parent": "769"}, + "773": {"name": "出租车", "parent": "769"}, + "774": {"name": "代驾", "parent": "769"}, + "775": {"name": "发电", "parent": "666"}, + "777": {"name": "输配电", "parent": "666"}, + "779": {"name": "风电", "parent": "667"}, + "780": {"name": "光伏/太阳能", "parent": "667"}, + "781": {"name": "生物质发电", "parent": "667"}, + "782": {"name": "煤化工", "parent": "667"}, + "783": {"name": "垃圾发电", "parent": "667"}, + "784": {"name": "核电", "parent": "667"}, + "785": {"name": "能源矿产", "parent": "669"}, + "786": {"name": "金属矿产", "parent": "669"}, + "787": {"name": "非金属矿产", "parent": "669"}, + "788": {"name": "水气矿产", "parent": "669"}, + "789": {"name": "锅炉", "parent": "775"}, + "790": {"name": "发电机", "parent": "775"}, + "791": {"name": "汽轮机", "parent": "775"}, + "792": {"name": "燃机", "parent": "775"}, + "793": {"name": "冷却", "parent": "775"}, + "794": {"name": "电力设计院", "parent": "775"}, + "795": {"name": "高压输配电", "parent": "777"}, + "796": {"name": "中压输配电", "parent": "777"}, + "797": {"name": "低压输配电", "parent": "777"}, + "798": {"name": "继电保护", "parent": "777"}, + "799": {"name": "智能电网", "parent": "777"}, + "800": {"name": "小学", "parent": "516"}, + "801": {"name": "电动车", "parent": "519"}, + "802": {"name": "皮具箱包", "parent": "518"}, + "803": {"name": "医药制造", "parent": "522"}, + "804": {"name": "电器销售", "parent": "536"}, + "805": {"name": "塑料制品", "parent": "527"}, + "806": {"name": "公益基金会", "parent": "530"}, + "807": {"name": "美发服务", "parent": "525"}, + "808": {"name": "农业养殖", "parent": "531"}, + "809": {"name": "金融服务", "parent": "513"}, + "810": {"name": "商业地产综合体", "parent": "514"}, + "811": {"name": "美容服务", "parent": "525"}, + "812": {"name": "灯饰", "parent": "518"}, + "813": {"name": "油墨颜料产品", "parent": "527"}, + "814": {"name": "眼镜制造", "parent": "518"}, + "815": {"name": "农业生物技术", "parent": "531"}, + "816": {"name": "体育用品", "parent": "518"}, + "817": {"name": "保健用品", "parent": "524"}, + "818": {"name": "化学化工产品", "parent": "527"}, + "819": {"name": "饲料", "parent": "531"}, + "821": {"name": "保安服务", "parent": "525"}, + "822": {"name": "干细胞技术", "parent": "522"}, + "824": {"name": "农药化肥", "parent": "527"}, + "825": {"name": "卫生洁具", "parent": "518"}, + "826": {"name": "体育器材、场馆", "parent": "518"}, + "827": {"name": "饲料加工", "parent": "531"}, + "828": {"name": "测绘服务", "parent": "529"}, + "830": {"name": "金属船舶制造", "parent": "519"}, + "831": {"name": "基因工程", "parent": "522"}, + "832": {"name": "花卉服务", "parent": "536"}, + "833": {"name": "农业种植", "parent": "531"}, + "834": {"name": "皮革制品", "parent": "518"}, + "835": {"name": "地理信息加工服务", "parent": "529"}, + "836": {"name": "机器人", "parent": "519"}, + "837": {"name": "礼品", "parent": "518"}, + "838": {"name": "理发及美容服务", "parent": "525"}, + "839": {"name": "其他清洁服务", "parent": "525"}, + "840": {"name": "硅胶材料", "parent": "527"}, + "841": {"name": "茶叶销售", "parent": "518"}, + "842": {"name": "彩票活动", "parent": "529"}, + "843": {"name": "化妆培训", "parent": "516"}, + "844": {"name": "鞋业", "parent": "518"}, + "845": {"name": "酒店用品", "parent": "518"}, + "846": {"name": "复合材料", "parent": "527"}, + "847": {"name": "房地产工程建设", "parent": "548"}, + "848": {"name": "知识产权服务", "parent": "559"}, + "849": {"name": "新型建材", "parent": "627"}, + "850": {"name": "企业投资咨询", "parent": "567"}, + "851": {"name": "含乳饮料和植物蛋白饮料制造", "parent": "594"}, + "852": {"name": "汽车检测设备", "parent": "629"}, + "853": {"name": "手机通讯器材", "parent": "417"}, + "854": {"name": "环保材料", "parent": "672"}, + "855": {"name": "交通设施", "parent": "554"}, + "856": {"name": "电子器件", "parent": "419"}, + "857": {"name": "啤酒", "parent": "594"}, + "858": {"name": "生态旅游", "parent": "657"}, + "859": {"name": "自动化设备", "parent": "626"}, + "860": {"name": "软件开发", "parent": "414"}, + "861": {"name": "葡萄酒销售", "parent": "594"}, + "862": {"name": "钢材", "parent": "633"}, + "863": {"name": "餐饮培训", "parent": "656"}, + "864": {"name": "速冻食品", "parent": "593"}, + "865": {"name": "空气环保", "parent": "672"}, + "866": {"name": "互联网房地产经纪服务", "parent": "550"}, + "867": {"name": "食品添加剂", "parent": "593"}, + "868": {"name": "演艺传播", "parent": "585"}, + "869": {"name": "信用卡", "parent": "537"}, + "870": {"name": "报纸期刊广告", "parent": "579"}, + "871": {"name": "摄影", "parent": "525"}, + "872": {"name": "手机软件", "parent": "414"}, + "873": {"name": "地坪建材", "parent": "627"}, + "874": {"name": "企业管理咨询", "parent": "567"}, + "875": {"name": "幼儿教育", "parent": "570"}, + "876": {"name": "系统集成", "parent": "416"}, + "877": {"name": "皮革服饰", "parent": "597"}, + "878": {"name": "保健食品", "parent": "593"}, + "879": {"name": "叉车", "parent": "620"}, + "880": {"name": "厨卫电器", "parent": "601"}, + "882": {"name": "地暖设备", "parent": "627"}, + "883": {"name": "钢结构制造", "parent": "548"}, + "884": {"name": "投影机", "parent": "606"}, + "885": {"name": "啤酒销售", "parent": "594"}, + "886": {"name": "度假村旅游", "parent": "657"}, + "887": {"name": "电力元件设备", "parent": "626"}, + "888": {"name": "管理软件", "parent": "414"}, + "889": {"name": "轴承", "parent": "628"}, + "890": {"name": "餐饮设备", "parent": "656"}, + "891": {"name": "肉制品及副产品加工", "parent": "593"}, + "892": {"name": "艺术收藏品投资交易", "parent": "584"}, + "893": {"name": "净水器", "parent": "601"}, + "894": {"name": "进口食品", "parent": "593"}, + "895": {"name": "娱乐文化传播", "parent": "585"}, + "896": {"name": "文化传播", "parent": "585"}, + "897": {"name": "商旅传媒", "parent": "580"}, + "898": {"name": "广告设计制作", "parent": "579"}, + "899": {"name": "金属丝绳及其制品制造", "parent": "627"}, + "900": {"name": "建筑涂料", "parent": "627"}, + "901": {"name": "抵押贷款", "parent": "543"}, + "902": {"name": "早教", "parent": "570"}, + "903": {"name": "电影放映", "parent": "583"}, + "904": {"name": "内衣服饰", "parent": "597"}, + "905": {"name": "无线网络通信", "parent": "418"}, + "906": {"name": "记忆卡", "parent": "415"}, + "907": {"name": "女装服饰", "parent": "597"}, + "908": {"name": "建筑机械", "parent": "620"}, + "909": {"name": "制冷电器", "parent": "601"}, + "910": {"name": "通信设备", "parent": "417"}, + "911": {"name": "空调设备", "parent": "601"}, + "912": {"name": "建筑装饰", "parent": "553"}, + "913": {"name": "办公设备", "parent": "603"}, + "916": {"name": "数据处理软件", "parent": "414"}, + "917": {"name": "葡萄酒贸易", "parent": "594"}, + "918": {"name": "通讯器材", "parent": "417"}, + "919": {"name": "铜业", "parent": "633"}, + "920": {"name": "食堂", "parent": "656"}, + "921": {"name": "糖果零食", "parent": "593"}, + "922": {"name": "文化艺术传播", "parent": "584"}, + "923": {"name": "太阳能电器", "parent": "601"}, + "924": {"name": "药品零售", "parent": "645"}, + "925": {"name": "果蔬食品", "parent": "593"}, + "926": {"name": "文化活动策划", "parent": "585"}, + "928": {"name": "汽车广告", "parent": "657"}, + "929": {"name": "条码设备", "parent": "630"}, + "930": {"name": "建筑石材", "parent": "627"}, + "931": {"name": "贵金属", "parent": "545"}, + "932": {"name": "体育", "parent": "660"}, + "933": {"name": "金融信息服务", "parent": "414"}, + "934": {"name": "玻璃建材", "parent": "627"}, + "935": {"name": "家教", "parent": "569"}, + "936": {"name": "歌舞厅娱乐活动", "parent": "586"}, + "937": {"name": "计算机服务器", "parent": "415"}, + "938": {"name": "管道", "parent": "627"}, + "939": {"name": "婴幼儿服饰", "parent": "597"}, + "940": {"name": "热水器", "parent": "601"}, + "941": {"name": "计算机及零部件制造", "parent": "415"}, + "942": {"name": "钢铁贸易", "parent": "633"}, + "944": {"name": "包装材料", "parent": "632"}, + "945": {"name": "计算机办公设备", "parent": "603"}, + "946": {"name": "白酒", "parent": "594"}, + "948": {"name": "发动机", "parent": "620"}, + "949": {"name": "快餐服务", "parent": "656"}, + "950": {"name": "酒类销售", "parent": "594"}, + "951": {"name": "电子产品、机电设备", "parent": "626"}, + "952": {"name": "激光设备", "parent": "626"}, + "953": {"name": "餐饮策划", "parent": "656"}, + "954": {"name": "饮料、食品", "parent": "594"}, + "955": {"name": "文化娱乐经纪", "parent": "585"}, + "956": {"name": "天然气", "parent": "665"}, + "957": {"name": "农副食品", "parent": "593"}, + "958": {"name": "艺术表演", "parent": "585"}, + "959": {"name": "石膏、水泥制品及类似制品制造", "parent": "627"}, + "960": {"name": "橱柜", "parent": "602"}, + "961": {"name": "管理培训", "parent": "577"}, + "962": {"name": "男装服饰", "parent": "597"}, + "963": {"name": "化肥制造", "parent": "675"}, + "964": {"name": "童装服饰", "parent": "597"}, + "965": {"name": "电源电池", "parent": "626"}, + "966": {"name": "家电维修", "parent": "664"}, + "967": {"name": "光电子器件", "parent": "419"}, + "968": {"name": "旅行社服务", "parent": "657"}, + "969": {"name": "电线、电缆制造", "parent": "626"}, + "970": {"name": "软件开发、信息系统集成", "parent": "419"}, + "971": {"name": "白酒制造", "parent": "594"}, + "973": {"name": "甜品服务", "parent": "656"}, + "974": {"name": "糕点、面包制造", "parent": "593"}, + "975": {"name": "木工机械", "parent": "620"}, + "976": {"name": "酒吧服务", "parent": "656"}, + "977": {"name": "火腿肠", "parent": "593"}, + "978": {"name": "广告策划推广", "parent": "579"}, + "979": {"name": "新能源产品和生产装备制造", "parent": "667"}, + "980": {"name": "调味品", "parent": "593"}, + "981": {"name": "礼仪表演", "parent": "585"}, + "982": {"name": "劳务派遣", "parent": "560"}, + "983": {"name": "建材零售", "parent": "627"}, + "984": {"name": "商品交易中心", "parent": "545"}, + "985": {"name": "体育推广", "parent": "585"}, + "986": {"name": "茶饮料及其他饮料制造", "parent": "594"}, + "987": {"name": "金属建材", "parent": "627"}, + "988": {"name": "职业技能培训", "parent": "571"}, + "989": {"name": "网吧活动", "parent": "586"}, + "990": {"name": "洗衣服务", "parent": "658"}, + "991": {"name": "管道工程", "parent": "554"}, + "992": {"name": "通信工程", "parent": "417"}, + "993": {"name": "电子元器件", "parent": "626"}, + "994": {"name": "电子设备", "parent": "419"}, + "995": {"name": "茶馆服务", "parent": "656"}, + "996": {"name": "旅游开发", "parent": "657"}, + "997": {"name": "视频通讯", "parent": "417"}, + "998": {"name": "白酒销售", "parent": "594"}, + "1000": {"name": "咖啡馆服务", "parent": "656"}, + "1001": {"name": "食品零售", "parent": "593"}, + "1002": {"name": "健康疗养旅游", "parent": "655"}, + "1003": {"name": "粮油食品", "parent": "593"}, + "1004": {"name": "儿童教育影视", "parent": "583"}, + "1005": {"name": "新能源发电", "parent": "667"}, + "1006": {"name": "旅游策划", "parent": "657"}, + "1007": {"name": "绘画", "parent": "575"}, + "1008": {"name": "方便面及其他方便食品", "parent": "593"}, + "1009": {"name": "房地产经纪", "parent": "550"}, + "1010": {"name": "母婴家政", "parent": "661"}, + "1011": {"name": "居家养老健康服务", "parent": "661"}, + "1012": {"name": "文化艺术投资", "parent": "545"}, + "1013": {"name": "运动健身", "parent": "660"}, + "1014": {"name": "瓶(罐)装饮用水制造", "parent": "594"}, + "1015": {"name": "金属门窗", "parent": "627"}, + "1016": {"name": "机动车检测", "parent": "563"}, + "1017": {"name": "货物运输", "parent": "634"}, + "1018": {"name": "服饰专卖", "parent": "690"}, + "1019": {"name": "酒店服装", "parent": "597"}, + "1020": {"name": "通讯软件", "parent": "417"}, + "1021": {"name": "消防工程", "parent": "554"}, + "1022": {"name": "嵌入式电子系统", "parent": "419"}, + "1023": {"name": "航空票务", "parent": "636"}, + "1024": {"name": "电气设备", "parent": "626"}, + "1025": {"name": "酒业贸易", "parent": "594"}, + "1027": {"name": "其他饮料及冷饮服务", "parent": "656"}, + "1028": {"name": "乳制品", "parent": "593"}, + "1029": {"name": "新闻期刊出版", "parent": "588"}, + "1030": {"name": "水污染治理", "parent": "672"}, + "1031": {"name": "谷物食品", "parent": "593"}, + "1032": {"name": "数字动漫设计制造服务", "parent": "590"}, + "1033": {"name": "医院", "parent": "646"}, + "1034": {"name": "旅游广告", "parent": "657"}, + "1035": {"name": "办公家具", "parent": "602"}, + "1036": {"name": "房地产营销策划", "parent": "550"}, + "1037": {"name": "保洁家政", "parent": "661"}, + "1038": {"name": "水泥制造", "parent": "627"}, + "1039": {"name": "市场研究咨询", "parent": "567"}, + "1040": {"name": "驾校", "parent": "571"}, + "1041": {"name": "正餐服务", "parent": "656"}, + "1043": {"name": "机动车燃油", "parent": "665"}, + "1044": {"name": "食品", "parent": "593"}, + "1045": {"name": "新能源汽车", "parent": "629"}, + "1046": {"name": "手机无线网络推广", "parent": "417"}, + "1047": {"name": "环保设备", "parent": "672"}, + "1048": {"name": "通讯工程", "parent": "418"}, + "1049": {"name": "半导体集成电路", "parent": "419"}, + "1050": {"name": "航空服务", "parent": "636"}, + "1051": {"name": "电机设备", "parent": "626"}, + "1052": {"name": "档案软件", "parent": "414"}, + "1053": {"name": "冷链物流服务", "parent": "634"}, + "1054": {"name": "小吃服务", "parent": "656"}, + "1055": {"name": "水产品加工", "parent": "593"}, + "1056": {"name": "图书出版", "parent": "588"}, + "1057": {"name": "固体废物治理", "parent": "672"}, + "1059": {"name": "坚果食品", "parent": "593"}, + "1060": {"name": "广告传媒", "parent": "579"}, + "1061": {"name": "电梯", "parent": "622"}, + "1062": {"name": "社区医疗与卫生院", "parent": "646"}, + "1063": {"name": "广告、印刷包装", "parent": "630"}, + "1064": {"name": "婚纱礼服", "parent": "662"}, + "1065": {"name": "地毯", "parent": "602"}, + "1066": {"name": "互联网物业", "parent": "551"}, + "1067": {"name": "跨境电商", "parent": "3"}, + "1068": {"name": "信息安全、系统集成", "parent": "9"}, + "1069": {"name": "专用汽车制造", "parent": "750"}, + "1070": {"name": "商品贸易", "parent": "3"}, + "1071": {"name": "墙壁装饰材料", "parent": "746"}, + "1072": {"name": "窗帘装饰材料", "parent": "746"}, + "1073": {"name": "电子商务、本地生活服务", "parent": "3"}, + "1075": {"name": "白酒电子商务", "parent": "3"}, + "1076": {"name": "商品贸易、电子商务", "parent": "3"}, + "1077": {"name": "木质装饰材料", "parent": "746"}, + "1078": {"name": "电子商务、汽车电商交易平台", "parent": "3"}, + "1079": {"name": "汽车轮胎", "parent": "751"}, + "1080": {"name": "气体压缩机械制造", "parent": "732"}, + "1081": {"name": "家装家具电子商务", "parent": "3"}, + "1082": {"name": "化妆品电子商务", "parent": "3"}, + "1083": {"name": "汽车销售", "parent": "749"}, + "1084": {"name": "新闻资讯网站", "parent": "510"}, + "1085": {"name": "母婴电商", "parent": "3"}, + "1086": {"name": "电商商务、收藏品交易", "parent": "3"}, + "1088": {"name": "电子商务、数码产品", "parent": "3"}, + "1089": {"name": "二手车交易", "parent": "749"}, + "1090": {"name": "游戏制作服务", "parent": "5"}, + "1091": {"name": "母婴服务", "parent": "510"}, + "1092": {"name": "家具电子商务", "parent": "3"}, + "1093": {"name": "汽车配件电子商务", "parent": "3"}, + "1094": {"name": "输配电设备", "parent": "777"}, + "1095": {"name": "矿山设备", "parent": "727"}, + "1096": {"name": "机床机械", "parent": "726"}, + "1097": {"name": "农产品电商", "parent": "3"}, + "1098": {"name": "陶瓷装饰材料", "parent": "746"}, + "1099": {"name": "车载联网设备", "parent": "487"}, + "1100": {"name": "汽车销售电子商务", "parent": "3"}, + "1101": {"name": "石油设备", "parent": "730"}, + "1102": {"name": "智能家居", "parent": "487"}, + "1103": {"name": "散热器", "parent": "751"}, + "1104": {"name": "电力工程", "parent": "775"}, + "1105": {"name": "生鲜电商", "parent": "3"}, + "1106": {"name": "互联网数据服务", "parent": "490"}, + "1107": {"name": "房车、商务车销售", "parent": "749"}, + "1108": {"name": "茶叶电子商务", "parent": "3"}, + "1109": {"name": "酒类电子商务", "parent": "3"}, + "1110": {"name": "阀门", "parent": "730"}, + "1111": {"name": "食品电商", "parent": "3"}, + "1112": {"name": "儿童摄影", "parent": "871"}, + "1113": {"name": "广告摄影", "parent": "871"}, + "1114": {"name": "婚纱摄影", "parent": "871"}, + "1115": {"name": "模具制造", "parent": "620"}, + "1116": {"name": "汽车模具", "parent": "629"}, + "1117": {"name": "认证咨询", "parent": "567"}, + "1118": {"name": "数字视觉制作服务", "parent": "590"}, + "1119": {"name": "牙科及医疗器械", "parent": "646"}, + "1120": {"name": "猎头招聘", "parent": "560"}, + "1121": {"name": "家居", "parent": "518"}, + "1122": {"name": "收藏品", "parent": "518"}, + "1123": {"name": "首饰", "parent": "518"}, + "1124": {"name": "工艺品", "parent": "518"}, + "1125": {"name": "财务", "parent": "515"}, + "1126": {"name": "税务", "parent": "515"}, + "1127": {"name": "分类信息", "parent": "2"}, + "1128": {"name": "宠物", "parent": "0"}, + "1129": {"name": "快消品", "parent": "518"}, + "1130": {"name": "人工智能", "parent": "2"}, + "1131": {"name": "农/林/牧/渔", "parent": "0"}, } + def get_names(id): id = str(id) nms = [] d = TBL.get(id) - if not d:return [] + if not d: + return [] nms.append(d["name"]) p = get_names(d["parent"]) - if p: nms.extend(p) + if p: + nms.extend(p) return nms + if __name__ == "__main__": print(get_names("1119")) diff --git a/deepdoc/parser/resume/entities/regions.py b/deepdoc/parser/resume/entities/regions.py index e1707530b48f75f8bf7bb17565eb417748da1878..4c3f0d722fc1277aca1fcd6fe5842019a6a5dbf2 100644 --- a/deepdoc/parser/resume/entities/regions.py +++ b/deepdoc/parser/resume/entities/regions.py @@ -10,766 +10,776 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import re TBL = { -"2":{"name":"北京","parent":"1"}, -"3":{"name":"天津","parent":"1"}, -"4":{"name":"河北","parent":"1"}, -"5":{"name":"山西","parent":"1"}, -"6":{"name":"内蒙古","parent":"1"}, -"7":{"name":"辽宁","parent":"1"}, -"8":{"name":"吉林","parent":"1"}, -"9":{"name":"黑龙江","parent":"1"}, -"10":{"name":"上海","parent":"1"}, -"11":{"name":"江苏","parent":"1"}, -"12":{"name":"浙江","parent":"1"}, -"13":{"name":"安徽","parent":"1"}, -"14":{"name":"福建","parent":"1"}, -"15":{"name":"江西","parent":"1"}, -"16":{"name":"山东","parent":"1"}, -"17":{"name":"河南","parent":"1"}, -"18":{"name":"湖北","parent":"1"}, -"19":{"name":"湖南","parent":"1"}, -"20":{"name":"广东","parent":"1"}, -"21":{"name":"广西","parent":"1"}, -"22":{"name":"海南","parent":"1"}, -"23":{"name":"重庆","parent":"1"}, -"24":{"name":"四川","parent":"1"}, -"25":{"name":"贵州","parent":"1"}, -"26":{"name":"云南","parent":"1"}, -"27":{"name":"西藏","parent":"1"}, -"28":{"name":"陕西","parent":"1"}, -"29":{"name":"甘肃","parent":"1"}, -"30":{"name":"青海","parent":"1"}, -"31":{"name":"宁夏","parent":"1"}, -"32":{"name":"新疆","parent":"1"}, -"33":{"name":"北京市","parent":"2"}, -"34":{"name":"天津市","parent":"3"}, -"35":{"name":"石家庄市","parent":"4"}, -"36":{"name":"唐山市","parent":"4"}, -"37":{"name":"秦皇岛市","parent":"4"}, -"38":{"name":"邯郸市","parent":"4"}, -"39":{"name":"邢台市","parent":"4"}, -"40":{"name":"保定市","parent":"4"}, -"41":{"name":"张家口市","parent":"4"}, -"42":{"name":"承德市","parent":"4"}, -"43":{"name":"沧州市","parent":"4"}, -"44":{"name":"廊坊市","parent":"4"}, -"45":{"name":"衡水市","parent":"4"}, -"46":{"name":"太原市","parent":"5"}, -"47":{"name":"大同市","parent":"5"}, -"48":{"name":"阳泉市","parent":"5"}, -"49":{"name":"长治市","parent":"5"}, -"50":{"name":"晋城市","parent":"5"}, -"51":{"name":"朔州市","parent":"5"}, -"52":{"name":"晋中市","parent":"5"}, -"53":{"name":"运城市","parent":"5"}, -"54":{"name":"忻州市","parent":"5"}, -"55":{"name":"临汾市","parent":"5"}, -"56":{"name":"吕梁市","parent":"5"}, -"57":{"name":"呼和浩特市","parent":"6"}, -"58":{"name":"包头市","parent":"6"}, -"59":{"name":"乌海市","parent":"6"}, -"60":{"name":"赤峰市","parent":"6"}, -"61":{"name":"通辽市","parent":"6"}, -"62":{"name":"鄂尔多斯市","parent":"6"}, -"63":{"name":"呼伦贝尔市","parent":"6"}, -"64":{"name":"巴彦淖尔市","parent":"6"}, -"65":{"name":"乌兰察布市","parent":"6"}, -"66":{"name":"兴安盟","parent":"6"}, -"67":{"name":"锡林郭勒盟","parent":"6"}, -"68":{"name":"阿拉善盟","parent":"6"}, -"69":{"name":"沈阳市","parent":"7"}, -"70":{"name":"大连市","parent":"7"}, -"71":{"name":"鞍山市","parent":"7"}, -"72":{"name":"抚顺市","parent":"7"}, -"73":{"name":"本溪市","parent":"7"}, -"74":{"name":"丹东市","parent":"7"}, -"75":{"name":"锦州市","parent":"7"}, -"76":{"name":"营口市","parent":"7"}, -"77":{"name":"阜新市","parent":"7"}, -"78":{"name":"辽阳市","parent":"7"}, -"79":{"name":"盘锦市","parent":"7"}, -"80":{"name":"铁岭市","parent":"7"}, -"81":{"name":"朝阳市","parent":"7"}, -"82":{"name":"葫芦岛市","parent":"7"}, -"83":{"name":"长春市","parent":"8"}, -"84":{"name":"吉林市","parent":"8"}, -"85":{"name":"四平市","parent":"8"}, -"86":{"name":"辽源市","parent":"8"}, -"87":{"name":"通化市","parent":"8"}, -"88":{"name":"白山市","parent":"8"}, -"89":{"name":"松原市","parent":"8"}, -"90":{"name":"白城市","parent":"8"}, -"91":{"name":"延边朝鲜族自治州","parent":"8"}, -"92":{"name":"哈尔滨市","parent":"9"}, -"93":{"name":"齐齐哈尔市","parent":"9"}, -"94":{"name":"鸡西市","parent":"9"}, -"95":{"name":"鹤岗市","parent":"9"}, -"96":{"name":"双鸭山市","parent":"9"}, -"97":{"name":"大庆市","parent":"9"}, -"98":{"name":"伊春市","parent":"9"}, -"99":{"name":"佳木斯市","parent":"9"}, -"100":{"name":"七台河市","parent":"9"}, -"101":{"name":"牡丹江市","parent":"9"}, -"102":{"name":"黑河市","parent":"9"}, -"103":{"name":"绥化市","parent":"9"}, -"104":{"name":"大兴安岭地区","parent":"9"}, -"105":{"name":"上海市","parent":"10"}, -"106":{"name":"南京市","parent":"11"}, -"107":{"name":"无锡市","parent":"11"}, -"108":{"name":"徐州市","parent":"11"}, -"109":{"name":"常州市","parent":"11"}, -"110":{"name":"苏州市","parent":"11"}, -"111":{"name":"南通市","parent":"11"}, -"112":{"name":"连云港市","parent":"11"}, -"113":{"name":"淮安市","parent":"11"}, -"114":{"name":"盐城市","parent":"11"}, -"115":{"name":"扬州市","parent":"11"}, -"116":{"name":"镇江市","parent":"11"}, -"117":{"name":"泰州市","parent":"11"}, -"118":{"name":"宿迁市","parent":"11"}, -"119":{"name":"杭州市","parent":"12"}, -"120":{"name":"宁波市","parent":"12"}, -"121":{"name":"温州市","parent":"12"}, -"122":{"name":"嘉兴市","parent":"12"}, -"123":{"name":"湖州市","parent":"12"}, -"124":{"name":"绍兴市","parent":"12"}, -"125":{"name":"金华市","parent":"12"}, -"126":{"name":"衢州市","parent":"12"}, -"127":{"name":"舟山市","parent":"12"}, -"128":{"name":"台州市","parent":"12"}, -"129":{"name":"丽水市","parent":"12"}, -"130":{"name":"合肥市","parent":"13"}, -"131":{"name":"芜湖市","parent":"13"}, -"132":{"name":"蚌埠市","parent":"13"}, -"133":{"name":"淮南市","parent":"13"}, -"134":{"name":"马鞍山市","parent":"13"}, -"135":{"name":"淮北市","parent":"13"}, -"136":{"name":"铜陵市","parent":"13"}, -"137":{"name":"安庆市","parent":"13"}, -"138":{"name":"黄山市","parent":"13"}, -"139":{"name":"滁州市","parent":"13"}, -"140":{"name":"阜阳市","parent":"13"}, -"141":{"name":"宿州市","parent":"13"}, -"143":{"name":"六安市","parent":"13"}, -"144":{"name":"亳州市","parent":"13"}, -"145":{"name":"池州市","parent":"13"}, -"146":{"name":"宣城市","parent":"13"}, -"147":{"name":"福州市","parent":"14"}, -"148":{"name":"厦门市","parent":"14"}, -"149":{"name":"莆田市","parent":"14"}, -"150":{"name":"三明市","parent":"14"}, -"151":{"name":"泉州市","parent":"14"}, -"152":{"name":"漳州市","parent":"14"}, -"153":{"name":"南平市","parent":"14"}, -"154":{"name":"龙岩市","parent":"14"}, -"155":{"name":"宁德市","parent":"14"}, -"156":{"name":"南昌市","parent":"15"}, -"157":{"name":"景德镇市","parent":"15"}, -"158":{"name":"萍乡市","parent":"15"}, -"159":{"name":"九江市","parent":"15"}, -"160":{"name":"新余市","parent":"15"}, -"161":{"name":"鹰潭市","parent":"15"}, -"162":{"name":"赣州市","parent":"15"}, -"163":{"name":"吉安市","parent":"15"}, -"164":{"name":"宜春市","parent":"15"}, -"165":{"name":"抚州市","parent":"15"}, -"166":{"name":"上饶市","parent":"15"}, -"167":{"name":"济南市","parent":"16"}, -"168":{"name":"青岛市","parent":"16"}, -"169":{"name":"淄博市","parent":"16"}, -"170":{"name":"枣庄市","parent":"16"}, -"171":{"name":"东营市","parent":"16"}, -"172":{"name":"烟台市","parent":"16"}, -"173":{"name":"潍坊市","parent":"16"}, -"174":{"name":"济宁市","parent":"16"}, -"175":{"name":"泰安市","parent":"16"}, -"176":{"name":"威海市","parent":"16"}, -"177":{"name":"日照市","parent":"16"}, -"179":{"name":"临沂市","parent":"16"}, -"180":{"name":"德州市","parent":"16"}, -"181":{"name":"聊城市","parent":"16"}, -"182":{"name":"滨州市","parent":"16"}, -"183":{"name":"菏泽市","parent":"16"}, -"184":{"name":"郑州市","parent":"17"}, -"185":{"name":"开封市","parent":"17"}, -"186":{"name":"洛阳市","parent":"17"}, -"187":{"name":"平顶山市","parent":"17"}, -"188":{"name":"安阳市","parent":"17"}, -"189":{"name":"鹤壁市","parent":"17"}, -"190":{"name":"新乡市","parent":"17"}, -"191":{"name":"焦作市","parent":"17"}, -"192":{"name":"濮阳市","parent":"17"}, -"193":{"name":"许昌市","parent":"17"}, -"194":{"name":"漯河市","parent":"17"}, -"195":{"name":"三门峡市","parent":"17"}, -"196":{"name":"南阳市","parent":"17"}, -"197":{"name":"商丘市","parent":"17"}, -"198":{"name":"信阳市","parent":"17"}, -"199":{"name":"周口市","parent":"17"}, -"200":{"name":"驻马店市","parent":"17"}, -"201":{"name":"武汉市","parent":"18"}, -"202":{"name":"黄石市","parent":"18"}, -"203":{"name":"十堰市","parent":"18"}, -"204":{"name":"宜昌市","parent":"18"}, -"205":{"name":"襄阳市","parent":"18"}, -"206":{"name":"鄂州市","parent":"18"}, -"207":{"name":"荆门市","parent":"18"}, -"208":{"name":"孝感市","parent":"18"}, -"209":{"name":"荆州市","parent":"18"}, -"210":{"name":"黄冈市","parent":"18"}, -"211":{"name":"咸宁市","parent":"18"}, -"212":{"name":"随州市","parent":"18"}, -"213":{"name":"恩施土家族苗族自治州","parent":"18"}, -"215":{"name":"长沙市","parent":"19"}, -"216":{"name":"株洲市","parent":"19"}, -"217":{"name":"湘潭市","parent":"19"}, -"218":{"name":"衡阳市","parent":"19"}, -"219":{"name":"邵阳市","parent":"19"}, -"220":{"name":"岳阳市","parent":"19"}, -"221":{"name":"常德市","parent":"19"}, -"222":{"name":"张家界市","parent":"19"}, -"223":{"name":"益阳市","parent":"19"}, -"224":{"name":"郴州市","parent":"19"}, -"225":{"name":"永州市","parent":"19"}, -"226":{"name":"怀化市","parent":"19"}, -"227":{"name":"娄底市","parent":"19"}, -"228":{"name":"湘西土家族苗族自治州","parent":"19"}, -"229":{"name":"广州市","parent":"20"}, -"230":{"name":"韶关市","parent":"20"}, -"231":{"name":"深圳市","parent":"20"}, -"232":{"name":"珠海市","parent":"20"}, -"233":{"name":"汕头市","parent":"20"}, -"234":{"name":"佛山市","parent":"20"}, -"235":{"name":"江门市","parent":"20"}, -"236":{"name":"湛江市","parent":"20"}, -"237":{"name":"茂名市","parent":"20"}, -"238":{"name":"肇庆市","parent":"20"}, -"239":{"name":"惠州市","parent":"20"}, -"240":{"name":"梅州市","parent":"20"}, -"241":{"name":"汕尾市","parent":"20"}, -"242":{"name":"河源市","parent":"20"}, -"243":{"name":"阳江市","parent":"20"}, -"244":{"name":"清远市","parent":"20"}, -"245":{"name":"东莞市","parent":"20"}, -"246":{"name":"中山市","parent":"20"}, -"247":{"name":"潮州市","parent":"20"}, -"248":{"name":"揭阳市","parent":"20"}, -"249":{"name":"云浮市","parent":"20"}, -"250":{"name":"南宁市","parent":"21"}, -"251":{"name":"柳州市","parent":"21"}, -"252":{"name":"桂林市","parent":"21"}, -"253":{"name":"梧州市","parent":"21"}, -"254":{"name":"北海市","parent":"21"}, -"255":{"name":"防城港市","parent":"21"}, -"256":{"name":"钦州市","parent":"21"}, -"257":{"name":"贵港市","parent":"21"}, -"258":{"name":"玉林市","parent":"21"}, -"259":{"name":"百色市","parent":"21"}, -"260":{"name":"贺州市","parent":"21"}, -"261":{"name":"河池市","parent":"21"}, -"262":{"name":"来宾市","parent":"21"}, -"263":{"name":"崇左市","parent":"21"}, -"264":{"name":"海口市","parent":"22"}, -"265":{"name":"三亚市","parent":"22"}, -"267":{"name":"重庆市","parent":"23"}, -"268":{"name":"成都市","parent":"24"}, -"269":{"name":"自贡市","parent":"24"}, -"270":{"name":"攀枝花市","parent":"24"}, -"271":{"name":"泸州市","parent":"24"}, -"272":{"name":"德阳市","parent":"24"}, -"273":{"name":"绵阳市","parent":"24"}, -"274":{"name":"广元市","parent":"24"}, -"275":{"name":"遂宁市","parent":"24"}, -"276":{"name":"内江市","parent":"24"}, -"277":{"name":"乐山市","parent":"24"}, -"278":{"name":"南充市","parent":"24"}, -"279":{"name":"眉山市","parent":"24"}, -"280":{"name":"宜宾市","parent":"24"}, -"281":{"name":"广安市","parent":"24"}, -"282":{"name":"达州市","parent":"24"}, -"283":{"name":"雅安市","parent":"24"}, -"284":{"name":"巴中市","parent":"24"}, -"285":{"name":"资阳市","parent":"24"}, -"286":{"name":"阿坝藏族羌族自治州","parent":"24"}, -"287":{"name":"甘孜藏族自治州","parent":"24"}, -"288":{"name":"凉山彝族自治州","parent":"24"}, -"289":{"name":"贵阳市","parent":"25"}, -"290":{"name":"六盘水市","parent":"25"}, -"291":{"name":"遵义市","parent":"25"}, -"292":{"name":"安顺市","parent":"25"}, -"293":{"name":"铜仁市","parent":"25"}, -"294":{"name":"黔西南布依族苗族自治州","parent":"25"}, -"295":{"name":"毕节市","parent":"25"}, -"296":{"name":"黔东南苗族侗族自治州","parent":"25"}, -"297":{"name":"黔南布依族苗族自治州","parent":"25"}, -"298":{"name":"昆明市","parent":"26"}, -"299":{"name":"曲靖市","parent":"26"}, -"300":{"name":"玉溪市","parent":"26"}, -"301":{"name":"保山市","parent":"26"}, -"302":{"name":"昭通市","parent":"26"}, -"303":{"name":"丽江市","parent":"26"}, -"304":{"name":"普洱市","parent":"26"}, -"305":{"name":"临沧市","parent":"26"}, -"306":{"name":"楚雄彝族自治州","parent":"26"}, -"307":{"name":"红河哈尼族彝族自治州","parent":"26"}, -"308":{"name":"文山壮族苗族自治州","parent":"26"}, -"309":{"name":"西双版纳傣族自治州","parent":"26"}, -"310":{"name":"大理白族自治州","parent":"26"}, -"311":{"name":"德宏傣族景颇族自治州","parent":"26"}, -"312":{"name":"怒江傈僳族自治州","parent":"26"}, -"313":{"name":"迪庆藏族自治州","parent":"26"}, -"314":{"name":"拉萨市","parent":"27"}, -"315":{"name":"昌都市","parent":"27"}, -"316":{"name":"山南市","parent":"27"}, -"317":{"name":"日喀则市","parent":"27"}, -"318":{"name":"那曲市","parent":"27"}, -"319":{"name":"阿里地区","parent":"27"}, -"320":{"name":"林芝市","parent":"27"}, -"321":{"name":"西安市","parent":"28"}, -"322":{"name":"铜川市","parent":"28"}, -"323":{"name":"宝鸡市","parent":"28"}, -"324":{"name":"咸阳市","parent":"28"}, -"325":{"name":"渭南市","parent":"28"}, -"326":{"name":"延安市","parent":"28"}, -"327":{"name":"汉中市","parent":"28"}, -"328":{"name":"榆林市","parent":"28"}, -"329":{"name":"安康市","parent":"28"}, -"330":{"name":"商洛市","parent":"28"}, -"331":{"name":"兰州市","parent":"29"}, -"332":{"name":"嘉峪关市","parent":"29"}, -"333":{"name":"金昌市","parent":"29"}, -"334":{"name":"白银市","parent":"29"}, -"335":{"name":"天水市","parent":"29"}, -"336":{"name":"武威市","parent":"29"}, -"337":{"name":"张掖市","parent":"29"}, -"338":{"name":"平凉市","parent":"29"}, -"339":{"name":"酒泉市","parent":"29"}, -"340":{"name":"庆阳市","parent":"29"}, -"341":{"name":"定西市","parent":"29"}, -"342":{"name":"陇南市","parent":"29"}, -"343":{"name":"临夏回族自治州","parent":"29"}, -"344":{"name":"甘南藏族自治州","parent":"29"}, -"345":{"name":"西宁市","parent":"30"}, -"346":{"name":"海东市","parent":"30"}, -"347":{"name":"海北藏族自治州","parent":"30"}, -"348":{"name":"黄南藏族自治州","parent":"30"}, -"349":{"name":"海南藏族自治州","parent":"30"}, -"350":{"name":"果洛藏族自治州","parent":"30"}, -"351":{"name":"玉树藏族自治州","parent":"30"}, -"352":{"name":"海西蒙古族藏族自治州","parent":"30"}, -"353":{"name":"银川市","parent":"31"}, -"354":{"name":"石嘴山市","parent":"31"}, -"355":{"name":"吴忠市","parent":"31"}, -"356":{"name":"固原市","parent":"31"}, -"357":{"name":"中卫市","parent":"31"}, -"358":{"name":"乌鲁木齐市","parent":"32"}, -"359":{"name":"克拉玛依市","parent":"32"}, -"360":{"name":"吐鲁番市","parent":"32"}, -"361":{"name":"哈密市","parent":"32"}, -"362":{"name":"昌吉回族自治州","parent":"32"}, -"363":{"name":"博尔塔拉蒙古自治州","parent":"32"}, -"364":{"name":"巴音郭楞蒙古自治州","parent":"32"}, -"365":{"name":"阿克苏地区","parent":"32"}, -"366":{"name":"克孜勒苏柯尔克孜自治州","parent":"32"}, -"367":{"name":"喀什地区","parent":"32"}, -"368":{"name":"和田地区","parent":"32"}, -"369":{"name":"伊犁哈萨克自治州","parent":"32"}, -"370":{"name":"塔城地区","parent":"32"}, -"371":{"name":"阿勒泰地区","parent":"32"}, -"372":{"name":"新疆省直辖行政单位","parent":"32"}, -"373":{"name":"可克达拉市","parent":"32"}, -"374":{"name":"昆玉市","parent":"32"}, -"375":{"name":"胡杨河市","parent":"32"}, -"376":{"name":"双河市","parent":"32"}, -"3560":{"name":"北票市","parent":"7"}, -"3615":{"name":"高州市","parent":"20"}, -"3651":{"name":"济源市","parent":"17"}, -"3662":{"name":"胶南市","parent":"16"}, -"3683":{"name":"老河口市","parent":"18"}, -"3758":{"name":"沙河市","parent":"4"}, -"3822":{"name":"宜城市","parent":"18"}, -"3842":{"name":"枣阳市","parent":"18"}, -"3850":{"name":"肇东市","parent":"9"}, -"3905":{"name":"澳门","parent":"1"}, -"3906":{"name":"澳门","parent":"3905"}, -"3907":{"name":"香港","parent":"1"}, -"3908":{"name":"香港","parent":"3907"}, -"3947":{"name":"仙桃市","parent":"18"}, -"3954":{"name":"台湾","parent":"1"}, -"3955":{"name":"台湾","parent":"3954"}, -"3956":{"name":"海外","parent":"1"}, -"3957":{"name":"海外","parent":"3956"}, -"3958":{"name":"美国","parent":"3956"}, -"3959":{"name":"加拿大","parent":"3956"}, -"3961":{"name":"日本","parent":"3956"}, -"3962":{"name":"韩国","parent":"3956"}, -"3963":{"name":"德国","parent":"3956"}, -"3964":{"name":"英国","parent":"3956"}, -"3965":{"name":"意大利","parent":"3956"}, -"3966":{"name":"西班牙","parent":"3956"}, -"3967":{"name":"法国","parent":"3956"}, -"3968":{"name":"澳大利亚","parent":"3956"}, -"3969":{"name":"东城区","parent":"2"}, -"3970":{"name":"西城区","parent":"2"}, -"3971":{"name":"崇文区","parent":"2"}, -"3972":{"name":"宣武区","parent":"2"}, -"3973":{"name":"朝阳区","parent":"2"}, -"3974":{"name":"海淀区","parent":"2"}, -"3975":{"name":"丰台区","parent":"2"}, -"3976":{"name":"石景山区","parent":"2"}, -"3977":{"name":"门头沟区","parent":"2"}, -"3978":{"name":"房山区","parent":"2"}, -"3979":{"name":"通州区","parent":"2"}, -"3980":{"name":"顺义区","parent":"2"}, -"3981":{"name":"昌平区","parent":"2"}, -"3982":{"name":"大兴区","parent":"2"}, -"3983":{"name":"平谷区","parent":"2"}, -"3984":{"name":"怀柔区","parent":"2"}, -"3985":{"name":"密云区","parent":"2"}, -"3986":{"name":"延庆区","parent":"2"}, -"3987":{"name":"黄浦区","parent":"10"}, -"3988":{"name":"徐汇区","parent":"10"}, -"3989":{"name":"长宁区","parent":"10"}, -"3990":{"name":"静安区","parent":"10"}, -"3991":{"name":"普陀区","parent":"10"}, -"3992":{"name":"闸北区","parent":"10"}, -"3993":{"name":"虹口区","parent":"10"}, -"3994":{"name":"杨浦区","parent":"10"}, -"3995":{"name":"宝山区","parent":"10"}, -"3996":{"name":"闵行区","parent":"10"}, -"3997":{"name":"嘉定区","parent":"10"}, -"3998":{"name":"浦东新区","parent":"10"}, -"3999":{"name":"松江区","parent":"10"}, -"4000":{"name":"金山区","parent":"10"}, -"4001":{"name":"青浦区","parent":"10"}, -"4002":{"name":"奉贤区","parent":"10"}, -"4003":{"name":"崇明区","parent":"10"}, -"4004":{"name":"和平区","parent":"3"}, -"4005":{"name":"河东区","parent":"3"}, -"4006":{"name":"河西区","parent":"3"}, -"4007":{"name":"南开区","parent":"3"}, -"4008":{"name":"红桥区","parent":"3"}, -"4009":{"name":"河北区","parent":"3"}, -"4010":{"name":"滨海新区","parent":"3"}, -"4011":{"name":"东丽区","parent":"3"}, -"4012":{"name":"西青区","parent":"3"}, -"4013":{"name":"北辰区","parent":"3"}, -"4014":{"name":"津南区","parent":"3"}, -"4015":{"name":"武清区","parent":"3"}, -"4016":{"name":"宝坻区","parent":"3"}, -"4017":{"name":"静海区","parent":"3"}, -"4018":{"name":"宁河区","parent":"3"}, -"4019":{"name":"蓟州区","parent":"3"}, -"4020":{"name":"渝中区","parent":"23"}, -"4021":{"name":"江北区","parent":"23"}, -"4022":{"name":"南岸区","parent":"23"}, -"4023":{"name":"沙坪坝区","parent":"23"}, -"4024":{"name":"九龙坡区","parent":"23"}, -"4025":{"name":"大渡口区","parent":"23"}, -"4026":{"name":"渝北区","parent":"23"}, -"4027":{"name":"巴南区","parent":"23"}, -"4028":{"name":"北碚区","parent":"23"}, -"4029":{"name":"万州区","parent":"23"}, -"4030":{"name":"黔江区","parent":"23"}, -"4031":{"name":"永川区","parent":"23"}, -"4032":{"name":"涪陵区","parent":"23"}, -"4033":{"name":"江津区","parent":"23"}, -"4034":{"name":"合川区","parent":"23"}, -"4035":{"name":"双桥区","parent":"23"}, -"4036":{"name":"万盛区","parent":"23"}, -"4037":{"name":"荣昌区","parent":"23"}, -"4038":{"name":"大足区","parent":"23"}, -"4039":{"name":"璧山区","parent":"23"}, -"4040":{"name":"铜梁区","parent":"23"}, -"4041":{"name":"潼南区","parent":"23"}, -"4042":{"name":"綦江区","parent":"23"}, -"4043":{"name":"忠县","parent":"23"}, -"4044":{"name":"开州区","parent":"23"}, -"4045":{"name":"云阳县","parent":"23"}, -"4046":{"name":"梁平区","parent":"23"}, -"4047":{"name":"垫江县","parent":"23"}, -"4048":{"name":"丰都县","parent":"23"}, -"4049":{"name":"奉节县","parent":"23"}, -"4050":{"name":"巫山县","parent":"23"}, -"4051":{"name":"巫溪县","parent":"23"}, -"4052":{"name":"城口县","parent":"23"}, -"4053":{"name":"武隆区","parent":"23"}, -"4054":{"name":"石柱土家族自治县","parent":"23"}, -"4055":{"name":"秀山土家族苗族自治县","parent":"23"}, -"4056":{"name":"酉阳土家族苗族自治县","parent":"23"}, -"4057":{"name":"彭水苗族土家族自治县","parent":"23"}, -"4058":{"name":"潜江市","parent":"18"}, -"4059":{"name":"三沙市","parent":"22"}, -"4060":{"name":"石河子市","parent":"32"}, -"4061":{"name":"阿拉尔市","parent":"32"}, -"4062":{"name":"图木舒克市","parent":"32"}, -"4063":{"name":"五家渠市","parent":"32"}, -"4064":{"name":"北屯市","parent":"32"}, -"4065":{"name":"铁门关市","parent":"32"}, -"4066":{"name":"儋州市","parent":"22"}, -"4067":{"name":"五指山市","parent":"22"}, -"4068":{"name":"文昌市","parent":"22"}, -"4069":{"name":"琼海市","parent":"22"}, -"4070":{"name":"万宁市","parent":"22"}, -"4072":{"name":"定安县","parent":"22"}, -"4073":{"name":"屯昌县","parent":"22"}, -"4074":{"name":"澄迈县","parent":"22"}, -"4075":{"name":"临高县","parent":"22"}, -"4076":{"name":"琼中黎族苗族自治县","parent":"22"}, -"4077":{"name":"保亭黎族苗族自治县","parent":"22"}, -"4078":{"name":"白沙黎族自治县","parent":"22"}, -"4079":{"name":"昌江黎族自治县","parent":"22"}, -"4080":{"name":"乐东黎族自治县","parent":"22"}, -"4081":{"name":"陵水黎族自治县","parent":"22"}, -"4082":{"name":"马来西亚","parent":"3956"}, -"6047":{"name":"长寿区","parent":"23"}, -"6857":{"name":"阿富汗","parent":"3956"}, -"6858":{"name":"阿尔巴尼亚","parent":"3956"}, -"6859":{"name":"阿尔及利亚","parent":"3956"}, -"6860":{"name":"美属萨摩亚","parent":"3956"}, -"6861":{"name":"安道尔","parent":"3956"}, -"6862":{"name":"安哥拉","parent":"3956"}, -"6863":{"name":"安圭拉","parent":"3956"}, -"6864":{"name":"南极洲","parent":"3956"}, -"6865":{"name":"安提瓜和巴布达","parent":"3956"}, -"6866":{"name":"阿根廷","parent":"3956"}, -"6867":{"name":"亚美尼亚","parent":"3956"}, -"6869":{"name":"奥地利","parent":"3956"}, -"6870":{"name":"阿塞拜疆","parent":"3956"}, -"6871":{"name":"巴哈马","parent":"3956"}, -"6872":{"name":"巴林","parent":"3956"}, -"6873":{"name":"孟加拉国","parent":"3956"}, -"6874":{"name":"巴巴多斯","parent":"3956"}, -"6875":{"name":"白俄罗斯","parent":"3956"}, -"6876":{"name":"比利时","parent":"3956"}, -"6877":{"name":"伯利兹","parent":"3956"}, -"6878":{"name":"贝宁","parent":"3956"}, -"6879":{"name":"百慕大","parent":"3956"}, -"6880":{"name":"不丹","parent":"3956"}, -"6881":{"name":"玻利维亚","parent":"3956"}, -"6882":{"name":"波黑","parent":"3956"}, -"6883":{"name":"博茨瓦纳","parent":"3956"}, -"6884":{"name":"布维岛","parent":"3956"}, -"6885":{"name":"巴西","parent":"3956"}, -"6886":{"name":"英属印度洋领土","parent":"3956"}, -"6887":{"name":"文莱","parent":"3956"}, -"6888":{"name":"保加利亚","parent":"3956"}, -"6889":{"name":"布基纳法索","parent":"3956"}, -"6890":{"name":"布隆迪","parent":"3956"}, -"6891":{"name":"柬埔寨","parent":"3956"}, -"6892":{"name":"喀麦隆","parent":"3956"}, -"6893":{"name":"佛得角","parent":"3956"}, -"6894":{"name":"开曼群岛","parent":"3956"}, -"6895":{"name":"中非","parent":"3956"}, -"6896":{"name":"乍得","parent":"3956"}, -"6897":{"name":"智利","parent":"3956"}, -"6898":{"name":"圣诞岛","parent":"3956"}, -"6899":{"name":"科科斯(基林)群岛","parent":"3956"}, -"6900":{"name":"哥伦比亚","parent":"3956"}, -"6901":{"name":"科摩罗","parent":"3956"}, -"6902":{"name":"刚果(布)","parent":"3956"}, -"6903":{"name":"刚果(金)","parent":"3956"}, -"6904":{"name":"库克群岛","parent":"3956"}, -"6905":{"name":"哥斯达黎加","parent":"3956"}, -"6906":{"name":"科特迪瓦","parent":"3956"}, -"6907":{"name":"克罗地亚","parent":"3956"}, -"6908":{"name":"古巴","parent":"3956"}, -"6909":{"name":"塞浦路斯","parent":"3956"}, -"6910":{"name":"捷克","parent":"3956"}, -"6911":{"name":"丹麦","parent":"3956"}, -"6912":{"name":"吉布提","parent":"3956"}, -"6913":{"name":"多米尼克","parent":"3956"}, -"6914":{"name":"多米尼加共和国","parent":"3956"}, -"6915":{"name":"东帝汶","parent":"3956"}, -"6916":{"name":"厄瓜多尔","parent":"3956"}, -"6917":{"name":"埃及","parent":"3956"}, -"6918":{"name":"萨尔瓦多","parent":"3956"}, -"6919":{"name":"赤道几内亚","parent":"3956"}, -"6920":{"name":"厄立特里亚","parent":"3956"}, -"6921":{"name":"爱沙尼亚","parent":"3956"}, -"6922":{"name":"埃塞俄比亚","parent":"3956"}, -"6923":{"name":"福克兰群岛(马尔维纳斯)","parent":"3956"}, -"6924":{"name":"法罗群岛","parent":"3956"}, -"6925":{"name":"斐济","parent":"3956"}, -"6926":{"name":"芬兰","parent":"3956"}, -"6927":{"name":"法属圭亚那","parent":"3956"}, -"6928":{"name":"法属波利尼西亚","parent":"3956"}, -"6929":{"name":"法属南部领土","parent":"3956"}, -"6930":{"name":"加蓬","parent":"3956"}, -"6931":{"name":"冈比亚","parent":"3956"}, -"6932":{"name":"格鲁吉亚","parent":"3956"}, -"6933":{"name":"加纳","parent":"3956"}, -"6934":{"name":"直布罗陀","parent":"3956"}, -"6935":{"name":"希腊","parent":"3956"}, -"6936":{"name":"格陵兰","parent":"3956"}, -"6937":{"name":"格林纳达","parent":"3956"}, -"6938":{"name":"瓜德罗普","parent":"3956"}, -"6939":{"name":"关岛","parent":"3956"}, -"6940":{"name":"危地马拉","parent":"3956"}, -"6941":{"name":"几内亚","parent":"3956"}, -"6942":{"name":"几内亚比绍","parent":"3956"}, -"6943":{"name":"圭亚那","parent":"3956"}, -"6944":{"name":"海地","parent":"3956"}, -"6945":{"name":"赫德岛和麦克唐纳岛","parent":"3956"}, -"6946":{"name":"洪都拉斯","parent":"3956"}, -"6947":{"name":"匈牙利","parent":"3956"}, -"6948":{"name":"冰岛","parent":"3956"}, -"6949":{"name":"印度","parent":"3956"}, -"6950":{"name":"印度尼西亚","parent":"3956"}, -"6951":{"name":"伊朗","parent":"3956"}, -"6952":{"name":"伊拉克","parent":"3956"}, -"6953":{"name":"爱尔兰","parent":"3956"}, -"6954":{"name":"以色列","parent":"3956"}, -"6955":{"name":"牙买加","parent":"3956"}, -"6956":{"name":"约旦","parent":"3956"}, -"6957":{"name":"哈萨克斯坦","parent":"3956"}, -"6958":{"name":"肯尼亚","parent":"3956"}, -"6959":{"name":"基里巴斯","parent":"3956"}, -"6960":{"name":"朝鲜","parent":"3956"}, -"6961":{"name":"科威特","parent":"3956"}, -"6962":{"name":"吉尔吉斯斯坦","parent":"3956"}, -"6963":{"name":"老挝","parent":"3956"}, -"6964":{"name":"拉脱维亚","parent":"3956"}, -"6965":{"name":"黎巴嫩","parent":"3956"}, -"6966":{"name":"莱索托","parent":"3956"}, -"6967":{"name":"利比里亚","parent":"3956"}, -"6968":{"name":"利比亚","parent":"3956"}, -"6969":{"name":"列支敦士登","parent":"3956"}, -"6970":{"name":"立陶宛","parent":"3956"}, -"6971":{"name":"卢森堡","parent":"3956"}, -"6972":{"name":"前南马其顿","parent":"3956"}, -"6973":{"name":"马达加斯加","parent":"3956"}, -"6974":{"name":"马拉维","parent":"3956"}, -"6975":{"name":"马尔代夫","parent":"3956"}, -"6976":{"name":"马里","parent":"3956"}, -"6977":{"name":"马耳他","parent":"3956"}, -"6978":{"name":"马绍尔群岛","parent":"3956"}, -"6979":{"name":"马提尼克","parent":"3956"}, -"6980":{"name":"毛里塔尼亚","parent":"3956"}, -"6981":{"name":"毛里求斯","parent":"3956"}, -"6982":{"name":"马约特","parent":"3956"}, -"6983":{"name":"墨西哥","parent":"3956"}, -"6984":{"name":"密克罗尼西亚联邦","parent":"3956"}, -"6985":{"name":"摩尔多瓦","parent":"3956"}, -"6986":{"name":"摩纳哥","parent":"3956"}, -"6987":{"name":"蒙古","parent":"3956"}, -"6988":{"name":"蒙特塞拉特","parent":"3956"}, -"6989":{"name":"摩洛哥","parent":"3956"}, -"6990":{"name":"莫桑比克","parent":"3956"}, -"6991":{"name":"缅甸","parent":"3956"}, -"6992":{"name":"纳米比亚","parent":"3956"}, -"6993":{"name":"瑙鲁","parent":"3956"}, -"6994":{"name":"尼泊尔","parent":"3956"}, -"6995":{"name":"荷兰","parent":"3956"}, -"6996":{"name":"荷属安的列斯","parent":"3956"}, -"6997":{"name":"新喀里多尼亚","parent":"3956"}, -"6998":{"name":"新西兰","parent":"3956"}, -"6999":{"name":"尼加拉瓜","parent":"3956"}, -"7000":{"name":"尼日尔","parent":"3956"}, -"7001":{"name":"尼日利亚","parent":"3956"}, -"7002":{"name":"纽埃","parent":"3956"}, -"7003":{"name":"诺福克岛","parent":"3956"}, -"7004":{"name":"北马里亚纳","parent":"3956"}, -"7005":{"name":"挪威","parent":"3956"}, -"7006":{"name":"阿曼","parent":"3956"}, -"7007":{"name":"巴基斯坦","parent":"3956"}, -"7008":{"name":"帕劳","parent":"3956"}, -"7009":{"name":"巴勒斯坦","parent":"3956"}, -"7010":{"name":"巴拿马","parent":"3956"}, -"7011":{"name":"巴布亚新几内亚","parent":"3956"}, -"7012":{"name":"巴拉圭","parent":"3956"}, -"7013":{"name":"秘鲁","parent":"3956"}, -"7014":{"name":"菲律宾","parent":"3956"}, -"7015":{"name":"皮特凯恩群岛","parent":"3956"}, -"7016":{"name":"波兰","parent":"3956"}, -"7017":{"name":"葡萄牙","parent":"3956"}, -"7018":{"name":"波多黎各","parent":"3956"}, -"7019":{"name":"卡塔尔","parent":"3956"}, -"7020":{"name":"留尼汪","parent":"3956"}, -"7021":{"name":"罗马尼亚","parent":"3956"}, -"7022":{"name":"俄罗斯联邦","parent":"3956"}, -"7023":{"name":"卢旺达","parent":"3956"}, -"7024":{"name":"圣赫勒拿","parent":"3956"}, -"7025":{"name":"圣基茨和尼维斯","parent":"3956"}, -"7026":{"name":"圣卢西亚","parent":"3956"}, -"7027":{"name":"圣皮埃尔和密克隆","parent":"3956"}, -"7028":{"name":"圣文森特和格林纳丁斯","parent":"3956"}, -"7029":{"name":"萨摩亚","parent":"3956"}, -"7030":{"name":"圣马力诺","parent":"3956"}, -"7031":{"name":"圣多美和普林西比","parent":"3956"}, -"7032":{"name":"沙特阿拉伯","parent":"3956"}, -"7033":{"name":"塞内加尔","parent":"3956"}, -"7034":{"name":"塞舌尔","parent":"3956"}, -"7035":{"name":"塞拉利昂","parent":"3956"}, -"7036":{"name":"新加坡","parent":"3956"}, -"7037":{"name":"斯洛伐克","parent":"3956"}, -"7038":{"name":"斯洛文尼亚","parent":"3956"}, -"7039":{"name":"所罗门群岛","parent":"3956"}, -"7040":{"name":"索马里","parent":"3956"}, -"7041":{"name":"南非","parent":"3956"}, -"7042":{"name":"南乔治亚岛和南桑德韦奇岛","parent":"3956"}, -"7043":{"name":"斯里兰卡","parent":"3956"}, -"7044":{"name":"苏丹","parent":"3956"}, -"7045":{"name":"苏里南","parent":"3956"}, -"7046":{"name":"斯瓦尔巴群岛","parent":"3956"}, -"7047":{"name":"斯威士兰","parent":"3956"}, -"7048":{"name":"瑞典","parent":"3956"}, -"7049":{"name":"瑞士","parent":"3956"}, -"7050":{"name":"叙利亚","parent":"3956"}, -"7051":{"name":"塔吉克斯坦","parent":"3956"}, -"7052":{"name":"坦桑尼亚","parent":"3956"}, -"7053":{"name":"泰国","parent":"3956"}, -"7054":{"name":"多哥","parent":"3956"}, -"7055":{"name":"托克劳","parent":"3956"}, -"7056":{"name":"汤加","parent":"3956"}, -"7057":{"name":"特立尼达和多巴哥","parent":"3956"}, -"7058":{"name":"突尼斯","parent":"3956"}, -"7059":{"name":"土耳其","parent":"3956"}, -"7060":{"name":"土库曼斯坦","parent":"3956"}, -"7061":{"name":"特克斯科斯群岛","parent":"3956"}, -"7062":{"name":"图瓦卢","parent":"3956"}, -"7063":{"name":"乌干达","parent":"3956"}, -"7064":{"name":"乌克兰","parent":"3956"}, -"7065":{"name":"阿联酋","parent":"3956"}, -"7066":{"name":"美国本土外小岛屿","parent":"3956"}, -"7067":{"name":"乌拉圭","parent":"3956"}, -"7068":{"name":"乌兹别克斯坦","parent":"3956"}, -"7069":{"name":"瓦努阿图","parent":"3956"}, -"7070":{"name":"梵蒂冈","parent":"3956"}, -"7071":{"name":"委内瑞拉","parent":"3956"}, -"7072":{"name":"越南","parent":"3956"}, -"7073":{"name":"英属维尔京群岛","parent":"3956"}, -"7074":{"name":"美属维尔京群岛","parent":"3956"}, -"7075":{"name":"瓦利斯和富图纳","parent":"3956"}, -"7076":{"name":"西撒哈拉","parent":"3956"}, -"7077":{"name":"也门","parent":"3956"}, -"7078":{"name":"南斯拉夫","parent":"3956"}, -"7079":{"name":"赞比亚","parent":"3956"}, -"7080":{"name":"津巴布韦","parent":"3956"}, -"7081":{"name":"塞尔维亚","parent":"3956"}, -"7082":{"name":"雄安新区","parent":"4"}, -"7084":{"name":"天门市","parent":"18"} + "2": {"name": "北京", "parent": "1"}, + "3": {"name": "天津", "parent": "1"}, + "4": {"name": "河北", "parent": "1"}, + "5": {"name": "山西", "parent": "1"}, + "6": {"name": "内蒙古", "parent": "1"}, + "7": {"name": "辽宁", "parent": "1"}, + "8": {"name": "吉林", "parent": "1"}, + "9": {"name": "黑龙江", "parent": "1"}, + "10": {"name": "上海", "parent": "1"}, + "11": {"name": "江苏", "parent": "1"}, + "12": {"name": "浙江", "parent": "1"}, + "13": {"name": "安徽", "parent": "1"}, + "14": {"name": "福建", "parent": "1"}, + "15": {"name": "江西", "parent": "1"}, + "16": {"name": "山东", "parent": "1"}, + "17": {"name": "河南", "parent": "1"}, + "18": {"name": "湖北", "parent": "1"}, + "19": {"name": "湖南", "parent": "1"}, + "20": {"name": "广东", "parent": "1"}, + "21": {"name": "广西", "parent": "1"}, + "22": {"name": "海南", "parent": "1"}, + "23": {"name": "重庆", "parent": "1"}, + "24": {"name": "四川", "parent": "1"}, + "25": {"name": "贵州", "parent": "1"}, + "26": {"name": "云南", "parent": "1"}, + "27": {"name": "西藏", "parent": "1"}, + "28": {"name": "陕西", "parent": "1"}, + "29": {"name": "甘肃", "parent": "1"}, + "30": {"name": "青海", "parent": "1"}, + "31": {"name": "宁夏", "parent": "1"}, + "32": {"name": "新疆", "parent": "1"}, + "33": {"name": "北京市", "parent": "2"}, + "34": {"name": "天津市", "parent": "3"}, + "35": {"name": "石家庄市", "parent": "4"}, + "36": {"name": "唐山市", "parent": "4"}, + "37": {"name": "秦皇岛市", "parent": "4"}, + "38": {"name": "邯郸市", "parent": "4"}, + "39": {"name": "邢台市", "parent": "4"}, + "40": {"name": "保定市", "parent": "4"}, + "41": {"name": "张家口市", "parent": "4"}, + "42": {"name": "承德市", "parent": "4"}, + "43": {"name": "沧州市", "parent": "4"}, + "44": {"name": "廊坊市", "parent": "4"}, + "45": {"name": "衡水市", "parent": "4"}, + "46": {"name": "太原市", "parent": "5"}, + "47": {"name": "大同市", "parent": "5"}, + "48": {"name": "阳泉市", "parent": "5"}, + "49": {"name": "长治市", "parent": "5"}, + "50": {"name": "晋城市", "parent": "5"}, + "51": {"name": "朔州市", "parent": "5"}, + "52": {"name": "晋中市", "parent": "5"}, + "53": {"name": "运城市", "parent": "5"}, + "54": {"name": "忻州市", "parent": "5"}, + "55": {"name": "临汾市", "parent": "5"}, + "56": {"name": "吕梁市", "parent": "5"}, + "57": {"name": "呼和浩特市", "parent": "6"}, + "58": {"name": "包头市", "parent": "6"}, + "59": {"name": "乌海市", "parent": "6"}, + "60": {"name": "赤峰市", "parent": "6"}, + "61": {"name": "通辽市", "parent": "6"}, + "62": {"name": "鄂尔多斯市", "parent": "6"}, + "63": {"name": "呼伦贝尔市", "parent": "6"}, + "64": {"name": "巴彦淖尔市", "parent": "6"}, + "65": {"name": "乌兰察布市", "parent": "6"}, + "66": {"name": "兴安盟", "parent": "6"}, + "67": {"name": "锡林郭勒盟", "parent": "6"}, + "68": {"name": "阿拉善盟", "parent": "6"}, + "69": {"name": "沈阳市", "parent": "7"}, + "70": {"name": "大连市", "parent": "7"}, + "71": {"name": "鞍山市", "parent": "7"}, + "72": {"name": "抚顺市", "parent": "7"}, + "73": {"name": "本溪市", "parent": "7"}, + "74": {"name": "丹东市", "parent": "7"}, + "75": {"name": "锦州市", "parent": "7"}, + "76": {"name": "营口市", "parent": "7"}, + "77": {"name": "阜新市", "parent": "7"}, + "78": {"name": "辽阳市", "parent": "7"}, + "79": {"name": "盘锦市", "parent": "7"}, + "80": {"name": "铁岭市", "parent": "7"}, + "81": {"name": "朝阳市", "parent": "7"}, + "82": {"name": "葫芦岛市", "parent": "7"}, + "83": {"name": "长春市", "parent": "8"}, + "84": {"name": "吉林市", "parent": "8"}, + "85": {"name": "四平市", "parent": "8"}, + "86": {"name": "辽源市", "parent": "8"}, + "87": {"name": "通化市", "parent": "8"}, + "88": {"name": "白山市", "parent": "8"}, + "89": {"name": "松原市", "parent": "8"}, + "90": {"name": "白城市", "parent": "8"}, + "91": {"name": "延边朝鲜族自治州", "parent": "8"}, + "92": {"name": "哈尔滨市", "parent": "9"}, + "93": {"name": "齐齐哈尔市", "parent": "9"}, + "94": {"name": "鸡西市", "parent": "9"}, + "95": {"name": "鹤岗市", "parent": "9"}, + "96": {"name": "双鸭山市", "parent": "9"}, + "97": {"name": "大庆市", "parent": "9"}, + "98": {"name": "伊春市", "parent": "9"}, + "99": {"name": "佳木斯市", "parent": "9"}, + "100": {"name": "七台河市", "parent": "9"}, + "101": {"name": "牡丹江市", "parent": "9"}, + "102": {"name": "黑河市", "parent": "9"}, + "103": {"name": "绥化市", "parent": "9"}, + "104": {"name": "大兴安岭地区", "parent": "9"}, + "105": {"name": "上海市", "parent": "10"}, + "106": {"name": "南京市", "parent": "11"}, + "107": {"name": "无锡市", "parent": "11"}, + "108": {"name": "徐州市", "parent": "11"}, + "109": {"name": "常州市", "parent": "11"}, + "110": {"name": "苏州市", "parent": "11"}, + "111": {"name": "南通市", "parent": "11"}, + "112": {"name": "连云港市", "parent": "11"}, + "113": {"name": "淮安市", "parent": "11"}, + "114": {"name": "盐城市", "parent": "11"}, + "115": {"name": "扬州市", "parent": "11"}, + "116": {"name": "镇江市", "parent": "11"}, + "117": {"name": "泰州市", "parent": "11"}, + "118": {"name": "宿迁市", "parent": "11"}, + "119": {"name": "杭州市", "parent": "12"}, + "120": {"name": "宁波市", "parent": "12"}, + "121": {"name": "温州市", "parent": "12"}, + "122": {"name": "嘉兴市", "parent": "12"}, + "123": {"name": "湖州市", "parent": "12"}, + "124": {"name": "绍兴市", "parent": "12"}, + "125": {"name": "金华市", "parent": "12"}, + "126": {"name": "衢州市", "parent": "12"}, + "127": {"name": "舟山市", "parent": "12"}, + "128": {"name": "台州市", "parent": "12"}, + "129": {"name": "丽水市", "parent": "12"}, + "130": {"name": "合肥市", "parent": "13"}, + "131": {"name": "芜湖市", "parent": "13"}, + "132": {"name": "蚌埠市", "parent": "13"}, + "133": {"name": "淮南市", "parent": "13"}, + "134": {"name": "马鞍山市", "parent": "13"}, + "135": {"name": "淮北市", "parent": "13"}, + "136": {"name": "铜陵市", "parent": "13"}, + "137": {"name": "安庆市", "parent": "13"}, + "138": {"name": "黄山市", "parent": "13"}, + "139": {"name": "滁州市", "parent": "13"}, + "140": {"name": "阜阳市", "parent": "13"}, + "141": {"name": "宿州市", "parent": "13"}, + "143": {"name": "六安市", "parent": "13"}, + "144": {"name": "亳州市", "parent": "13"}, + "145": {"name": "池州市", "parent": "13"}, + "146": {"name": "宣城市", "parent": "13"}, + "147": {"name": "福州市", "parent": "14"}, + "148": {"name": "厦门市", "parent": "14"}, + "149": {"name": "莆田市", "parent": "14"}, + "150": {"name": "三明市", "parent": "14"}, + "151": {"name": "泉州市", "parent": "14"}, + "152": {"name": "漳州市", "parent": "14"}, + "153": {"name": "南平市", "parent": "14"}, + "154": {"name": "龙岩市", "parent": "14"}, + "155": {"name": "宁德市", "parent": "14"}, + "156": {"name": "南昌市", "parent": "15"}, + "157": {"name": "景德镇市", "parent": "15"}, + "158": {"name": "萍乡市", "parent": "15"}, + "159": {"name": "九江市", "parent": "15"}, + "160": {"name": "新余市", "parent": "15"}, + "161": {"name": "鹰潭市", "parent": "15"}, + "162": {"name": "赣州市", "parent": "15"}, + "163": {"name": "吉安市", "parent": "15"}, + "164": {"name": "宜春市", "parent": "15"}, + "165": {"name": "抚州市", "parent": "15"}, + "166": {"name": "上饶市", "parent": "15"}, + "167": {"name": "济南市", "parent": "16"}, + "168": {"name": "青岛市", "parent": "16"}, + "169": {"name": "淄博市", "parent": "16"}, + "170": {"name": "枣庄市", "parent": "16"}, + "171": {"name": "东营市", "parent": "16"}, + "172": {"name": "烟台市", "parent": "16"}, + "173": {"name": "潍坊市", "parent": "16"}, + "174": {"name": "济宁市", "parent": "16"}, + "175": {"name": "泰安市", "parent": "16"}, + "176": {"name": "威海市", "parent": "16"}, + "177": {"name": "日照市", "parent": "16"}, + "179": {"name": "临沂市", "parent": "16"}, + "180": {"name": "德州市", "parent": "16"}, + "181": {"name": "聊城市", "parent": "16"}, + "182": {"name": "滨州市", "parent": "16"}, + "183": {"name": "菏泽市", "parent": "16"}, + "184": {"name": "郑州市", "parent": "17"}, + "185": {"name": "开封市", "parent": "17"}, + "186": {"name": "洛阳市", "parent": "17"}, + "187": {"name": "平顶山市", "parent": "17"}, + "188": {"name": "安阳市", "parent": "17"}, + "189": {"name": "鹤壁市", "parent": "17"}, + "190": {"name": "新乡市", "parent": "17"}, + "191": {"name": "焦作市", "parent": "17"}, + "192": {"name": "濮阳市", "parent": "17"}, + "193": {"name": "许昌市", "parent": "17"}, + "194": {"name": "漯河市", "parent": "17"}, + "195": {"name": "三门峡市", "parent": "17"}, + "196": {"name": "南阳市", "parent": "17"}, + "197": {"name": "商丘市", "parent": "17"}, + "198": {"name": "信阳市", "parent": "17"}, + "199": {"name": "周口市", "parent": "17"}, + "200": {"name": "驻马店市", "parent": "17"}, + "201": {"name": "武汉市", "parent": "18"}, + "202": {"name": "黄石市", "parent": "18"}, + "203": {"name": "十堰市", "parent": "18"}, + "204": {"name": "宜昌市", "parent": "18"}, + "205": {"name": "襄阳市", "parent": "18"}, + "206": {"name": "鄂州市", "parent": "18"}, + "207": {"name": "荆门市", "parent": "18"}, + "208": {"name": "孝感市", "parent": "18"}, + "209": {"name": "荆州市", "parent": "18"}, + "210": {"name": "黄冈市", "parent": "18"}, + "211": {"name": "咸宁市", "parent": "18"}, + "212": {"name": "随州市", "parent": "18"}, + "213": {"name": "恩施土家族苗族自治州", "parent": "18"}, + "215": {"name": "长沙市", "parent": "19"}, + "216": {"name": "株洲市", "parent": "19"}, + "217": {"name": "湘潭市", "parent": "19"}, + "218": {"name": "衡阳市", "parent": "19"}, + "219": {"name": "邵阳市", "parent": "19"}, + "220": {"name": "岳阳市", "parent": "19"}, + "221": {"name": "常德市", "parent": "19"}, + "222": {"name": "张家界市", "parent": "19"}, + "223": {"name": "益阳市", "parent": "19"}, + "224": {"name": "郴州市", "parent": "19"}, + "225": {"name": "永州市", "parent": "19"}, + "226": {"name": "怀化市", "parent": "19"}, + "227": {"name": "娄底市", "parent": "19"}, + "228": {"name": "湘西土家族苗族自治州", "parent": "19"}, + "229": {"name": "广州市", "parent": "20"}, + "230": {"name": "韶关市", "parent": "20"}, + "231": {"name": "深圳市", "parent": "20"}, + "232": {"name": "珠海市", "parent": "20"}, + "233": {"name": "汕头市", "parent": "20"}, + "234": {"name": "佛山市", "parent": "20"}, + "235": {"name": "江门市", "parent": "20"}, + "236": {"name": "湛江市", "parent": "20"}, + "237": {"name": "茂名市", "parent": "20"}, + "238": {"name": "肇庆市", "parent": "20"}, + "239": {"name": "惠州市", "parent": "20"}, + "240": {"name": "梅州市", "parent": "20"}, + "241": {"name": "汕尾市", "parent": "20"}, + "242": {"name": "河源市", "parent": "20"}, + "243": {"name": "阳江市", "parent": "20"}, + "244": {"name": "清远市", "parent": "20"}, + "245": {"name": "东莞市", "parent": "20"}, + "246": {"name": "中山市", "parent": "20"}, + "247": {"name": "潮州市", "parent": "20"}, + "248": {"name": "揭阳市", "parent": "20"}, + "249": {"name": "云浮市", "parent": "20"}, + "250": {"name": "南宁市", "parent": "21"}, + "251": {"name": "柳州市", "parent": "21"}, + "252": {"name": "桂林市", "parent": "21"}, + "253": {"name": "梧州市", "parent": "21"}, + "254": {"name": "北海市", "parent": "21"}, + "255": {"name": "防城港市", "parent": "21"}, + "256": {"name": "钦州市", "parent": "21"}, + "257": {"name": "贵港市", "parent": "21"}, + "258": {"name": "玉林市", "parent": "21"}, + "259": {"name": "百色市", "parent": "21"}, + "260": {"name": "贺州市", "parent": "21"}, + "261": {"name": "河池市", "parent": "21"}, + "262": {"name": "来宾市", "parent": "21"}, + "263": {"name": "崇左市", "parent": "21"}, + "264": {"name": "海口市", "parent": "22"}, + "265": {"name": "三亚市", "parent": "22"}, + "267": {"name": "重庆市", "parent": "23"}, + "268": {"name": "成都市", "parent": "24"}, + "269": {"name": "自贡市", "parent": "24"}, + "270": {"name": "攀枝花市", "parent": "24"}, + "271": {"name": "泸州市", "parent": "24"}, + "272": {"name": "德阳市", "parent": "24"}, + "273": {"name": "绵阳市", "parent": "24"}, + "274": {"name": "广元市", "parent": "24"}, + "275": {"name": "遂宁市", "parent": "24"}, + "276": {"name": "内江市", "parent": "24"}, + "277": {"name": "乐山市", "parent": "24"}, + "278": {"name": "南充市", "parent": "24"}, + "279": {"name": "眉山市", "parent": "24"}, + "280": {"name": "宜宾市", "parent": "24"}, + "281": {"name": "广安市", "parent": "24"}, + "282": {"name": "达州市", "parent": "24"}, + "283": {"name": "雅安市", "parent": "24"}, + "284": {"name": "巴中市", "parent": "24"}, + "285": {"name": "资阳市", "parent": "24"}, + "286": {"name": "阿坝藏族羌族自治州", "parent": "24"}, + "287": {"name": "甘孜藏族自治州", "parent": "24"}, + "288": {"name": "凉山彝族自治州", "parent": "24"}, + "289": {"name": "贵阳市", "parent": "25"}, + "290": {"name": "六盘水市", "parent": "25"}, + "291": {"name": "遵义市", "parent": "25"}, + "292": {"name": "安顺市", "parent": "25"}, + "293": {"name": "铜仁市", "parent": "25"}, + "294": {"name": "黔西南布依族苗族自治州", "parent": "25"}, + "295": {"name": "毕节市", "parent": "25"}, + "296": {"name": "黔东南苗族侗族自治州", "parent": "25"}, + "297": {"name": "黔南布依族苗族自治州", "parent": "25"}, + "298": {"name": "昆明市", "parent": "26"}, + "299": {"name": "曲靖市", "parent": "26"}, + "300": {"name": "玉溪市", "parent": "26"}, + "301": {"name": "保山市", "parent": "26"}, + "302": {"name": "昭通市", "parent": "26"}, + "303": {"name": "丽江市", "parent": "26"}, + "304": {"name": "普洱市", "parent": "26"}, + "305": {"name": "临沧市", "parent": "26"}, + "306": {"name": "楚雄彝族自治州", "parent": "26"}, + "307": {"name": "红河哈尼族彝族自治州", "parent": "26"}, + "308": {"name": "文山壮族苗族自治州", "parent": "26"}, + "309": {"name": "西双版纳傣族自治州", "parent": "26"}, + "310": {"name": "大理白族自治州", "parent": "26"}, + "311": {"name": "德宏傣族景颇族自治州", "parent": "26"}, + "312": {"name": "怒江傈僳族自治州", "parent": "26"}, + "313": {"name": "迪庆藏族自治州", "parent": "26"}, + "314": {"name": "拉萨市", "parent": "27"}, + "315": {"name": "昌都市", "parent": "27"}, + "316": {"name": "山南市", "parent": "27"}, + "317": {"name": "日喀则市", "parent": "27"}, + "318": {"name": "那曲市", "parent": "27"}, + "319": {"name": "阿里地区", "parent": "27"}, + "320": {"name": "林芝市", "parent": "27"}, + "321": {"name": "西安市", "parent": "28"}, + "322": {"name": "铜川市", "parent": "28"}, + "323": {"name": "宝鸡市", "parent": "28"}, + "324": {"name": "咸阳市", "parent": "28"}, + "325": {"name": "渭南市", "parent": "28"}, + "326": {"name": "延安市", "parent": "28"}, + "327": {"name": "汉中市", "parent": "28"}, + "328": {"name": "榆林市", "parent": "28"}, + "329": {"name": "安康市", "parent": "28"}, + "330": {"name": "商洛市", "parent": "28"}, + "331": {"name": "兰州市", "parent": "29"}, + "332": {"name": "嘉峪关市", "parent": "29"}, + "333": {"name": "金昌市", "parent": "29"}, + "334": {"name": "白银市", "parent": "29"}, + "335": {"name": "天水市", "parent": "29"}, + "336": {"name": "武威市", "parent": "29"}, + "337": {"name": "张掖市", "parent": "29"}, + "338": {"name": "平凉市", "parent": "29"}, + "339": {"name": "酒泉市", "parent": "29"}, + "340": {"name": "庆阳市", "parent": "29"}, + "341": {"name": "定西市", "parent": "29"}, + "342": {"name": "陇南市", "parent": "29"}, + "343": {"name": "临夏回族自治州", "parent": "29"}, + "344": {"name": "甘南藏族自治州", "parent": "29"}, + "345": {"name": "西宁市", "parent": "30"}, + "346": {"name": "海东市", "parent": "30"}, + "347": {"name": "海北藏族自治州", "parent": "30"}, + "348": {"name": "黄南藏族自治州", "parent": "30"}, + "349": {"name": "海南藏族自治州", "parent": "30"}, + "350": {"name": "果洛藏族自治州", "parent": "30"}, + "351": {"name": "玉树藏族自治州", "parent": "30"}, + "352": {"name": "海西蒙古族藏族自治州", "parent": "30"}, + "353": {"name": "银川市", "parent": "31"}, + "354": {"name": "石嘴山市", "parent": "31"}, + "355": {"name": "吴忠市", "parent": "31"}, + "356": {"name": "固原市", "parent": "31"}, + "357": {"name": "中卫市", "parent": "31"}, + "358": {"name": "乌鲁木齐市", "parent": "32"}, + "359": {"name": "克拉玛依市", "parent": "32"}, + "360": {"name": "吐鲁番市", "parent": "32"}, + "361": {"name": "哈密市", "parent": "32"}, + "362": {"name": "昌吉回族自治州", "parent": "32"}, + "363": {"name": "博尔塔拉蒙古自治州", "parent": "32"}, + "364": {"name": "巴音郭楞蒙古自治州", "parent": "32"}, + "365": {"name": "阿克苏地区", "parent": "32"}, + "366": {"name": "克孜勒苏柯尔克孜自治州", "parent": "32"}, + "367": {"name": "喀什地区", "parent": "32"}, + "368": {"name": "和田地区", "parent": "32"}, + "369": {"name": "伊犁哈萨克自治州", "parent": "32"}, + "370": {"name": "塔城地区", "parent": "32"}, + "371": {"name": "阿勒泰地区", "parent": "32"}, + "372": {"name": "新疆省直辖行政单位", "parent": "32"}, + "373": {"name": "可克达拉市", "parent": "32"}, + "374": {"name": "昆玉市", "parent": "32"}, + "375": {"name": "胡杨河市", "parent": "32"}, + "376": {"name": "双河市", "parent": "32"}, + "3560": {"name": "北票市", "parent": "7"}, + "3615": {"name": "高州市", "parent": "20"}, + "3651": {"name": "济源市", "parent": "17"}, + "3662": {"name": "胶南市", "parent": "16"}, + "3683": {"name": "老河口市", "parent": "18"}, + "3758": {"name": "沙河市", "parent": "4"}, + "3822": {"name": "宜城市", "parent": "18"}, + "3842": {"name": "枣阳市", "parent": "18"}, + "3850": {"name": "肇东市", "parent": "9"}, + "3905": {"name": "澳门", "parent": "1"}, + "3906": {"name": "澳门", "parent": "3905"}, + "3907": {"name": "香港", "parent": "1"}, + "3908": {"name": "香港", "parent": "3907"}, + "3947": {"name": "仙桃市", "parent": "18"}, + "3954": {"name": "台湾", "parent": "1"}, + "3955": {"name": "台湾", "parent": "3954"}, + "3956": {"name": "海外", "parent": "1"}, + "3957": {"name": "海外", "parent": "3956"}, + "3958": {"name": "美国", "parent": "3956"}, + "3959": {"name": "加拿大", "parent": "3956"}, + "3961": {"name": "日本", "parent": "3956"}, + "3962": {"name": "韩国", "parent": "3956"}, + "3963": {"name": "德国", "parent": "3956"}, + "3964": {"name": "英国", "parent": "3956"}, + "3965": {"name": "意大利", "parent": "3956"}, + "3966": {"name": "西班牙", "parent": "3956"}, + "3967": {"name": "法国", "parent": "3956"}, + "3968": {"name": "澳大利亚", "parent": "3956"}, + "3969": {"name": "东城区", "parent": "2"}, + "3970": {"name": "西城区", "parent": "2"}, + "3971": {"name": "崇文区", "parent": "2"}, + "3972": {"name": "宣武区", "parent": "2"}, + "3973": {"name": "朝阳区", "parent": "2"}, + "3974": {"name": "海淀区", "parent": "2"}, + "3975": {"name": "丰台区", "parent": "2"}, + "3976": {"name": "石景山区", "parent": "2"}, + "3977": {"name": "门头沟区", "parent": "2"}, + "3978": {"name": "房山区", "parent": "2"}, + "3979": {"name": "通州区", "parent": "2"}, + "3980": {"name": "顺义区", "parent": "2"}, + "3981": {"name": "昌平区", "parent": "2"}, + "3982": {"name": "大兴区", "parent": "2"}, + "3983": {"name": "平谷区", "parent": "2"}, + "3984": {"name": "怀柔区", "parent": "2"}, + "3985": {"name": "密云区", "parent": "2"}, + "3986": {"name": "延庆区", "parent": "2"}, + "3987": {"name": "黄浦区", "parent": "10"}, + "3988": {"name": "徐汇区", "parent": "10"}, + "3989": {"name": "长宁区", "parent": "10"}, + "3990": {"name": "静安区", "parent": "10"}, + "3991": {"name": "普陀区", "parent": "10"}, + "3992": {"name": "闸北区", "parent": "10"}, + "3993": {"name": "虹口区", "parent": "10"}, + "3994": {"name": "杨浦区", "parent": "10"}, + "3995": {"name": "宝山区", "parent": "10"}, + "3996": {"name": "闵行区", "parent": "10"}, + "3997": {"name": "嘉定区", "parent": "10"}, + "3998": {"name": "浦东新区", "parent": "10"}, + "3999": {"name": "松江区", "parent": "10"}, + "4000": {"name": "金山区", "parent": "10"}, + "4001": {"name": "青浦区", "parent": "10"}, + "4002": {"name": "奉贤区", "parent": "10"}, + "4003": {"name": "崇明区", "parent": "10"}, + "4004": {"name": "和平区", "parent": "3"}, + "4005": {"name": "河东区", "parent": "3"}, + "4006": {"name": "河西区", "parent": "3"}, + "4007": {"name": "南开区", "parent": "3"}, + "4008": {"name": "红桥区", "parent": "3"}, + "4009": {"name": "河北区", "parent": "3"}, + "4010": {"name": "滨海新区", "parent": "3"}, + "4011": {"name": "东丽区", "parent": "3"}, + "4012": {"name": "西青区", "parent": "3"}, + "4013": {"name": "北辰区", "parent": "3"}, + "4014": {"name": "津南区", "parent": "3"}, + "4015": {"name": "武清区", "parent": "3"}, + "4016": {"name": "宝坻区", "parent": "3"}, + "4017": {"name": "静海区", "parent": "3"}, + "4018": {"name": "宁河区", "parent": "3"}, + "4019": {"name": "蓟州区", "parent": "3"}, + "4020": {"name": "渝中区", "parent": "23"}, + "4021": {"name": "江北区", "parent": "23"}, + "4022": {"name": "南岸区", "parent": "23"}, + "4023": {"name": "沙坪坝区", "parent": "23"}, + "4024": {"name": "九龙坡区", "parent": "23"}, + "4025": {"name": "大渡口区", "parent": "23"}, + "4026": {"name": "渝北区", "parent": "23"}, + "4027": {"name": "巴南区", "parent": "23"}, + "4028": {"name": "北碚区", "parent": "23"}, + "4029": {"name": "万州区", "parent": "23"}, + "4030": {"name": "黔江区", "parent": "23"}, + "4031": {"name": "永川区", "parent": "23"}, + "4032": {"name": "涪陵区", "parent": "23"}, + "4033": {"name": "江津区", "parent": "23"}, + "4034": {"name": "合川区", "parent": "23"}, + "4035": {"name": "双桥区", "parent": "23"}, + "4036": {"name": "万盛区", "parent": "23"}, + "4037": {"name": "荣昌区", "parent": "23"}, + "4038": {"name": "大足区", "parent": "23"}, + "4039": {"name": "璧山区", "parent": "23"}, + "4040": {"name": "铜梁区", "parent": "23"}, + "4041": {"name": "潼南区", "parent": "23"}, + "4042": {"name": "綦江区", "parent": "23"}, + "4043": {"name": "忠县", "parent": "23"}, + "4044": {"name": "开州区", "parent": "23"}, + "4045": {"name": "云阳县", "parent": "23"}, + "4046": {"name": "梁平区", "parent": "23"}, + "4047": {"name": "垫江县", "parent": "23"}, + "4048": {"name": "丰都县", "parent": "23"}, + "4049": {"name": "奉节县", "parent": "23"}, + "4050": {"name": "巫山县", "parent": "23"}, + "4051": {"name": "巫溪县", "parent": "23"}, + "4052": {"name": "城口县", "parent": "23"}, + "4053": {"name": "武隆区", "parent": "23"}, + "4054": {"name": "石柱土家族自治县", "parent": "23"}, + "4055": {"name": "秀山土家族苗族自治县", "parent": "23"}, + "4056": {"name": "酉阳土家族苗族自治县", "parent": "23"}, + "4057": {"name": "彭水苗族土家族自治县", "parent": "23"}, + "4058": {"name": "潜江市", "parent": "18"}, + "4059": {"name": "三沙市", "parent": "22"}, + "4060": {"name": "石河子市", "parent": "32"}, + "4061": {"name": "阿拉尔市", "parent": "32"}, + "4062": {"name": "图木舒克市", "parent": "32"}, + "4063": {"name": "五家渠市", "parent": "32"}, + "4064": {"name": "北屯市", "parent": "32"}, + "4065": {"name": "铁门关市", "parent": "32"}, + "4066": {"name": "儋州市", "parent": "22"}, + "4067": {"name": "五指山市", "parent": "22"}, + "4068": {"name": "文昌市", "parent": "22"}, + "4069": {"name": "琼海市", "parent": "22"}, + "4070": {"name": "万宁市", "parent": "22"}, + "4072": {"name": "定安县", "parent": "22"}, + "4073": {"name": "屯昌县", "parent": "22"}, + "4074": {"name": "澄迈县", "parent": "22"}, + "4075": {"name": "临高县", "parent": "22"}, + "4076": {"name": "琼中黎族苗族自治县", "parent": "22"}, + "4077": {"name": "保亭黎族苗族自治县", "parent": "22"}, + "4078": {"name": "白沙黎族自治县", "parent": "22"}, + "4079": {"name": "昌江黎族自治县", "parent": "22"}, + "4080": {"name": "乐东黎族自治县", "parent": "22"}, + "4081": {"name": "陵水黎族自治县", "parent": "22"}, + "4082": {"name": "马来西亚", "parent": "3956"}, + "6047": {"name": "长寿区", "parent": "23"}, + "6857": {"name": "阿富汗", "parent": "3956"}, + "6858": {"name": "阿尔巴尼亚", "parent": "3956"}, + "6859": {"name": "阿尔及利亚", "parent": "3956"}, + "6860": {"name": "美属萨摩亚", "parent": "3956"}, + "6861": {"name": "安道尔", "parent": "3956"}, + "6862": {"name": "安哥拉", "parent": "3956"}, + "6863": {"name": "安圭拉", "parent": "3956"}, + "6864": {"name": "南极洲", "parent": "3956"}, + "6865": {"name": "安提瓜和巴布达", "parent": "3956"}, + "6866": {"name": "阿根廷", "parent": "3956"}, + "6867": {"name": "亚美尼亚", "parent": "3956"}, + "6869": {"name": "奥地利", "parent": "3956"}, + "6870": {"name": "阿塞拜疆", "parent": "3956"}, + "6871": {"name": "巴哈马", "parent": "3956"}, + "6872": {"name": "巴林", "parent": "3956"}, + "6873": {"name": "孟加拉国", "parent": "3956"}, + "6874": {"name": "巴巴多斯", "parent": "3956"}, + "6875": {"name": "白俄罗斯", "parent": "3956"}, + "6876": {"name": "比利时", "parent": "3956"}, + "6877": {"name": "伯利兹", "parent": "3956"}, + "6878": {"name": "贝宁", "parent": "3956"}, + "6879": {"name": "百慕大", "parent": "3956"}, + "6880": {"name": "不丹", "parent": "3956"}, + "6881": {"name": "玻利维亚", "parent": "3956"}, + "6882": {"name": "波黑", "parent": "3956"}, + "6883": {"name": "博茨瓦纳", "parent": "3956"}, + "6884": {"name": "布维岛", "parent": "3956"}, + "6885": {"name": "巴西", "parent": "3956"}, + "6886": {"name": "英属印度洋领土", "parent": "3956"}, + "6887": {"name": "文莱", "parent": "3956"}, + "6888": {"name": "保加利亚", "parent": "3956"}, + "6889": {"name": "布基纳法索", "parent": "3956"}, + "6890": {"name": "布隆迪", "parent": "3956"}, + "6891": {"name": "柬埔寨", "parent": "3956"}, + "6892": {"name": "喀麦隆", "parent": "3956"}, + "6893": {"name": "佛得角", "parent": "3956"}, + "6894": {"name": "开曼群岛", "parent": "3956"}, + "6895": {"name": "中非", "parent": "3956"}, + "6896": {"name": "乍得", "parent": "3956"}, + "6897": {"name": "智利", "parent": "3956"}, + "6898": {"name": "圣诞岛", "parent": "3956"}, + "6899": {"name": "科科斯(基林)群岛", "parent": "3956"}, + "6900": {"name": "哥伦比亚", "parent": "3956"}, + "6901": {"name": "科摩罗", "parent": "3956"}, + "6902": {"name": "刚果(布)", "parent": "3956"}, + "6903": {"name": "刚果(金)", "parent": "3956"}, + "6904": {"name": "库克群岛", "parent": "3956"}, + "6905": {"name": "哥斯达黎加", "parent": "3956"}, + "6906": {"name": "科特迪瓦", "parent": "3956"}, + "6907": {"name": "克罗地亚", "parent": "3956"}, + "6908": {"name": "古巴", "parent": "3956"}, + "6909": {"name": "塞浦路斯", "parent": "3956"}, + "6910": {"name": "捷克", "parent": "3956"}, + "6911": {"name": "丹麦", "parent": "3956"}, + "6912": {"name": "吉布提", "parent": "3956"}, + "6913": {"name": "多米尼克", "parent": "3956"}, + "6914": {"name": "多米尼加共和国", "parent": "3956"}, + "6915": {"name": "东帝汶", "parent": "3956"}, + "6916": {"name": "厄瓜多尔", "parent": "3956"}, + "6917": {"name": "埃及", "parent": "3956"}, + "6918": {"name": "萨尔瓦多", "parent": "3956"}, + "6919": {"name": "赤道几内亚", "parent": "3956"}, + "6920": {"name": "厄立特里亚", "parent": "3956"}, + "6921": {"name": "爱沙尼亚", "parent": "3956"}, + "6922": {"name": "埃塞俄比亚", "parent": "3956"}, + "6923": {"name": "福克兰群岛(马尔维纳斯)", "parent": "3956"}, + "6924": {"name": "法罗群岛", "parent": "3956"}, + "6925": {"name": "斐济", "parent": "3956"}, + "6926": {"name": "芬兰", "parent": "3956"}, + "6927": {"name": "法属圭亚那", "parent": "3956"}, + "6928": {"name": "法属波利尼西亚", "parent": "3956"}, + "6929": {"name": "法属南部领土", "parent": "3956"}, + "6930": {"name": "加蓬", "parent": "3956"}, + "6931": {"name": "冈比亚", "parent": "3956"}, + "6932": {"name": "格鲁吉亚", "parent": "3956"}, + "6933": {"name": "加纳", "parent": "3956"}, + "6934": {"name": "直布罗陀", "parent": "3956"}, + "6935": {"name": "希腊", "parent": "3956"}, + "6936": {"name": "格陵兰", "parent": "3956"}, + "6937": {"name": "格林纳达", "parent": "3956"}, + "6938": {"name": "瓜德罗普", "parent": "3956"}, + "6939": {"name": "关岛", "parent": "3956"}, + "6940": {"name": "危地马拉", "parent": "3956"}, + "6941": {"name": "几内亚", "parent": "3956"}, + "6942": {"name": "几内亚比绍", "parent": "3956"}, + "6943": {"name": "圭亚那", "parent": "3956"}, + "6944": {"name": "海地", "parent": "3956"}, + "6945": {"name": "赫德岛和麦克唐纳岛", "parent": "3956"}, + "6946": {"name": "洪都拉斯", "parent": "3956"}, + "6947": {"name": "匈牙利", "parent": "3956"}, + "6948": {"name": "冰岛", "parent": "3956"}, + "6949": {"name": "印度", "parent": "3956"}, + "6950": {"name": "印度尼西亚", "parent": "3956"}, + "6951": {"name": "伊朗", "parent": "3956"}, + "6952": {"name": "伊拉克", "parent": "3956"}, + "6953": {"name": "爱尔兰", "parent": "3956"}, + "6954": {"name": "以色列", "parent": "3956"}, + "6955": {"name": "牙买加", "parent": "3956"}, + "6956": {"name": "约旦", "parent": "3956"}, + "6957": {"name": "哈萨克斯坦", "parent": "3956"}, + "6958": {"name": "肯尼亚", "parent": "3956"}, + "6959": {"name": "基里巴斯", "parent": "3956"}, + "6960": {"name": "朝鲜", "parent": "3956"}, + "6961": {"name": "科威特", "parent": "3956"}, + "6962": {"name": "吉尔吉斯斯坦", "parent": "3956"}, + "6963": {"name": "老挝", "parent": "3956"}, + "6964": {"name": "拉脱维亚", "parent": "3956"}, + "6965": {"name": "黎巴嫩", "parent": "3956"}, + "6966": {"name": "莱索托", "parent": "3956"}, + "6967": {"name": "利比里亚", "parent": "3956"}, + "6968": {"name": "利比亚", "parent": "3956"}, + "6969": {"name": "列支敦士登", "parent": "3956"}, + "6970": {"name": "立陶宛", "parent": "3956"}, + "6971": {"name": "卢森堡", "parent": "3956"}, + "6972": {"name": "前南马其顿", "parent": "3956"}, + "6973": {"name": "马达加斯加", "parent": "3956"}, + "6974": {"name": "马拉维", "parent": "3956"}, + "6975": {"name": "马尔代夫", "parent": "3956"}, + "6976": {"name": "马里", "parent": "3956"}, + "6977": {"name": "马耳他", "parent": "3956"}, + "6978": {"name": "马绍尔群岛", "parent": "3956"}, + "6979": {"name": "马提尼克", "parent": "3956"}, + "6980": {"name": "毛里塔尼亚", "parent": "3956"}, + "6981": {"name": "毛里求斯", "parent": "3956"}, + "6982": {"name": "马约特", "parent": "3956"}, + "6983": {"name": "墨西哥", "parent": "3956"}, + "6984": {"name": "密克罗尼西亚联邦", "parent": "3956"}, + "6985": {"name": "摩尔多瓦", "parent": "3956"}, + "6986": {"name": "摩纳哥", "parent": "3956"}, + "6987": {"name": "蒙古", "parent": "3956"}, + "6988": {"name": "蒙特塞拉特", "parent": "3956"}, + "6989": {"name": "摩洛哥", "parent": "3956"}, + "6990": {"name": "莫桑比克", "parent": "3956"}, + "6991": {"name": "缅甸", "parent": "3956"}, + "6992": {"name": "纳米比亚", "parent": "3956"}, + "6993": {"name": "瑙鲁", "parent": "3956"}, + "6994": {"name": "尼泊尔", "parent": "3956"}, + "6995": {"name": "荷兰", "parent": "3956"}, + "6996": {"name": "荷属安的列斯", "parent": "3956"}, + "6997": {"name": "新喀里多尼亚", "parent": "3956"}, + "6998": {"name": "新西兰", "parent": "3956"}, + "6999": {"name": "尼加拉瓜", "parent": "3956"}, + "7000": {"name": "尼日尔", "parent": "3956"}, + "7001": {"name": "尼日利亚", "parent": "3956"}, + "7002": {"name": "纽埃", "parent": "3956"}, + "7003": {"name": "诺福克岛", "parent": "3956"}, + "7004": {"name": "北马里亚纳", "parent": "3956"}, + "7005": {"name": "挪威", "parent": "3956"}, + "7006": {"name": "阿曼", "parent": "3956"}, + "7007": {"name": "巴基斯坦", "parent": "3956"}, + "7008": {"name": "帕劳", "parent": "3956"}, + "7009": {"name": "巴勒斯坦", "parent": "3956"}, + "7010": {"name": "巴拿马", "parent": "3956"}, + "7011": {"name": "巴布亚新几内亚", "parent": "3956"}, + "7012": {"name": "巴拉圭", "parent": "3956"}, + "7013": {"name": "秘鲁", "parent": "3956"}, + "7014": {"name": "菲律宾", "parent": "3956"}, + "7015": {"name": "皮特凯恩群岛", "parent": "3956"}, + "7016": {"name": "波兰", "parent": "3956"}, + "7017": {"name": "葡萄牙", "parent": "3956"}, + "7018": {"name": "波多黎各", "parent": "3956"}, + "7019": {"name": "卡塔尔", "parent": "3956"}, + "7020": {"name": "留尼汪", "parent": "3956"}, + "7021": {"name": "罗马尼亚", "parent": "3956"}, + "7022": {"name": "俄罗斯联邦", "parent": "3956"}, + "7023": {"name": "卢旺达", "parent": "3956"}, + "7024": {"name": "圣赫勒拿", "parent": "3956"}, + "7025": {"name": "圣基茨和尼维斯", "parent": "3956"}, + "7026": {"name": "圣卢西亚", "parent": "3956"}, + "7027": {"name": "圣皮埃尔和密克隆", "parent": "3956"}, + "7028": {"name": "圣文森特和格林纳丁斯", "parent": "3956"}, + "7029": {"name": "萨摩亚", "parent": "3956"}, + "7030": {"name": "圣马力诺", "parent": "3956"}, + "7031": {"name": "圣多美和普林西比", "parent": "3956"}, + "7032": {"name": "沙特阿拉伯", "parent": "3956"}, + "7033": {"name": "塞内加尔", "parent": "3956"}, + "7034": {"name": "塞舌尔", "parent": "3956"}, + "7035": {"name": "塞拉利昂", "parent": "3956"}, + "7036": {"name": "新加坡", "parent": "3956"}, + "7037": {"name": "斯洛伐克", "parent": "3956"}, + "7038": {"name": "斯洛文尼亚", "parent": "3956"}, + "7039": {"name": "所罗门群岛", "parent": "3956"}, + "7040": {"name": "索马里", "parent": "3956"}, + "7041": {"name": "南非", "parent": "3956"}, + "7042": {"name": "南乔治亚岛和南桑德韦奇岛", "parent": "3956"}, + "7043": {"name": "斯里兰卡", "parent": "3956"}, + "7044": {"name": "苏丹", "parent": "3956"}, + "7045": {"name": "苏里南", "parent": "3956"}, + "7046": {"name": "斯瓦尔巴群岛", "parent": "3956"}, + "7047": {"name": "斯威士兰", "parent": "3956"}, + "7048": {"name": "瑞典", "parent": "3956"}, + "7049": {"name": "瑞士", "parent": "3956"}, + "7050": {"name": "叙利亚", "parent": "3956"}, + "7051": {"name": "塔吉克斯坦", "parent": "3956"}, + "7052": {"name": "坦桑尼亚", "parent": "3956"}, + "7053": {"name": "泰国", "parent": "3956"}, + "7054": {"name": "多哥", "parent": "3956"}, + "7055": {"name": "托克劳", "parent": "3956"}, + "7056": {"name": "汤加", "parent": "3956"}, + "7057": {"name": "特立尼达和多巴哥", "parent": "3956"}, + "7058": {"name": "突尼斯", "parent": "3956"}, + "7059": {"name": "土耳其", "parent": "3956"}, + "7060": {"name": "土库曼斯坦", "parent": "3956"}, + "7061": {"name": "特克斯科斯群岛", "parent": "3956"}, + "7062": {"name": "图瓦卢", "parent": "3956"}, + "7063": {"name": "乌干达", "parent": "3956"}, + "7064": {"name": "乌克兰", "parent": "3956"}, + "7065": {"name": "阿联酋", "parent": "3956"}, + "7066": {"name": "美国本土外小岛屿", "parent": "3956"}, + "7067": {"name": "乌拉圭", "parent": "3956"}, + "7068": {"name": "乌兹别克斯坦", "parent": "3956"}, + "7069": {"name": "瓦努阿图", "parent": "3956"}, + "7070": {"name": "梵蒂冈", "parent": "3956"}, + "7071": {"name": "委内瑞拉", "parent": "3956"}, + "7072": {"name": "越南", "parent": "3956"}, + "7073": {"name": "英属维尔京群岛", "parent": "3956"}, + "7074": {"name": "美属维尔京群岛", "parent": "3956"}, + "7075": {"name": "瓦利斯和富图纳", "parent": "3956"}, + "7076": {"name": "西撒哈拉", "parent": "3956"}, + "7077": {"name": "也门", "parent": "3956"}, + "7078": {"name": "南斯拉夫", "parent": "3956"}, + "7079": {"name": "赞比亚", "parent": "3956"}, + "7080": {"name": "津巴布韦", "parent": "3956"}, + "7081": {"name": "塞尔维亚", "parent": "3956"}, + "7082": {"name": "雄安新区", "parent": "4"}, + "7084": {"name": "天门市", "parent": "18"}, } -NM_SET = set([v["name"] for _,v in TBL.items()]) +NM_SET = set([v["name"] for _, v in TBL.items()]) + def get_names(id): - if not id or str(id).lower() == "none":return [] + if not id or str(id).lower() == "none": + return [] id = str(id) - if not re.match("[0-9]+$", id.strip()):return [id] + if not re.match("[0-9]+$", id.strip()): + return [id] nms = [] d = TBL.get(id) - if not d:return[] + if not d: + return [] nms.append(d["name"]) p = get_names(d["parent"]) - if p: nms.extend(p) + if p: + nms.extend(p) return nms -import re + + def isName(nm): - if nm in NM_SET:return True - if nm + "市" in NM_SET:return True - if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET:return True + if nm in NM_SET: + return True + if nm + "市" in NM_SET: + return True + if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET: + return True return False diff --git a/deepdoc/parser/resume/entities/schools.py b/deepdoc/parser/resume/entities/schools.py index 31662cde94f9ba161e97ad3ba3703b526bf6b706..c6b0bf7fdbccc49aaa694c6134be20afe20e0c5e 100644 --- a/deepdoc/parser/resume/entities/schools.py +++ b/deepdoc/parser/resume/entities/schools.py @@ -16,8 +16,11 @@ import json import re import copy import pandas as pd + current_file_path = os.path.dirname(os.path.abspath(__file__)) -TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("") +TBL = pd.read_csv( + os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0 +).fillna("") TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip()) GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r")) GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH]) @@ -26,14 +29,15 @@ GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH]) def loadRank(fnm): global TBL TBL["rank"] = 1000000 - with open(fnm, "r", encoding='utf-8') as f: + with open(fnm, "r", encoding="utf-8") as f: while True: - l = f.readline() - if not l:break - l = l.strip("\n").split(",") + line = f.readline() + if not line: + break + line = line.strip("\n").split(",") try: - nm,rk = l[0].strip(),int(l[1]) - #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>" + nm, rk = line[0].strip(), int(line[1]) + # assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>" TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk except Exception: pass @@ -44,27 +48,35 @@ loadRank(os.path.join(current_file_path, "res/school.rank.csv")) def split(txt): tks = [] - for t in re.sub(r"[ \t]+", " ",txt).split(): - if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \ - re.match(r"[a-zA-Z]", t) and tks: + for t in re.sub(r"[ \t]+", " ", txt).split(): + if ( + tks + and re.match(r".*[a-zA-Z]$", tks[-1]) + and re.match(r"[a-zA-Z]", t) + and tks + ): tks[-1] = tks[-1] + " " + t - else:tks.append(t) + else: + tks.append(t) return tks def select(nm): global TBL - if not nm:return - if isinstance(nm, list):nm = str(nm[0]) + if not nm: + return + if isinstance(nm, list): + nm = str(nm[0]) nm = split(nm)[0] nm = str(nm).lower().strip() nm = re.sub(r"[((][^()()]+[))]", "", nm.lower()) nm = re.sub(r"(^the |[,.&()();;·]+|^(英国|美国|瑞士))", "", nm) nm = re.sub(r"大学.*学院", "大学", nm) tbl = copy.deepcopy(TBL) - tbl["hit_alias"] = tbl["alias"].map(lambda x:nm in set(x.split("+"))) - res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | (tbl.hit_alias == True))] - if res.empty:return + tbl["hit_alias"] = tbl["alias"].map(lambda x: nm in set(x.split("+"))) + res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | tbl.hit_alias)] + if res.empty: + return return json.loads(res.to_json(orient="records"))[0] @@ -74,4 +86,3 @@ def is_good(nm): nm = re.sub(r"[((][^()()]+[))]", "", nm.lower()) nm = re.sub(r"[''`‘’“”,. &()();;]+", "", nm) return nm in GOOD_SCH - diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py index 7d4297774231fcaf620b7b2074b8574faa374969..9e4376735c3b1ea8bd2d9f2cad12cee01ef53366 100644 --- a/deepdoc/parser/resume/step_two.py +++ b/deepdoc/parser/resume/step_two.py @@ -25,7 +25,8 @@ from xpinyin import Pinyin from contextlib import contextmanager -class TimeoutException(Exception): pass +class TimeoutException(Exception): + pass @contextmanager @@ -50,8 +51,10 @@ def rmHtmlTag(line): def highest_degree(dg): - if not dg: return "" - if type(dg) == type(""): dg = [dg] + if not dg: + return "" + if isinstance(dg, str): + dg = [dg] m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8} return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0] @@ -68,10 +71,12 @@ def forEdu(cv): for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))): e = {} if n.get("end_time"): - if n["end_time"] > edu_end_dt: edu_end_dt = n["end_time"] + if n["end_time"] > edu_end_dt: + edu_end_dt = n["end_time"] try: dt = n["end_time"] - if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt) + if re.match(r"[0-9]{9,}", dt): + dt = turnTm2Dt(dt) y, m, d = getYMD(dt) ed_dt.append(str(y)) e["end_dt_kwd"] = str(y) @@ -80,7 +85,8 @@ def forEdu(cv): if n.get("start_time"): try: dt = n["start_time"] - if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt) + if re.match(r"[0-9]{9,}", dt): + dt = turnTm2Dt(dt) y, m, d = getYMD(dt) st_dt.append(str(y)) e["start_dt_kwd"] = str(y) @@ -89,13 +95,20 @@ def forEdu(cv): r = schools.select(n.get("school_name", "")) if r: - if str(r.get("type", "")) == "1": fea.append("211") - if str(r.get("type", "")) == "2": fea.append("211") - if str(r.get("is_abroad", "")) == "1": fea.append("留学") - if str(r.get("is_double_first", "")) == "1": fea.append("双一流") - if str(r.get("is_985", "")) == "1": fea.append("985") - if str(r.get("is_world_known", "")) == "1": fea.append("海外知名") - if r.get("rank") and cv["school_rank_int"] > r["rank"]: cv["school_rank_int"] = r["rank"] + if str(r.get("type", "")) == "1": + fea.append("211") + if str(r.get("type", "")) == "2": + fea.append("211") + if str(r.get("is_abroad", "")) == "1": + fea.append("留学") + if str(r.get("is_double_first", "")) == "1": + fea.append("双一流") + if str(r.get("is_985", "")) == "1": + fea.append("985") + if str(r.get("is_world_known", "")) == "1": + fea.append("海外知名") + if r.get("rank") and cv["school_rank_int"] > r["rank"]: + cv["school_rank_int"] = r["rank"] if n.get("school_name") and isinstance(n["school_name"], str): sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"])) @@ -106,22 +119,25 @@ def forEdu(cv): maj.append(n["discipline_name"]) e["major_kwd"] = n["discipline_name"] - if not n.get("degree") and "985" in fea and not first_fea: n["degree"] = "1" + if not n.get("degree") and "985" in fea and not first_fea: + n["degree"] = "1" if n.get("degree"): d = degrees.get_name(n["degree"]) - if d: e["degree_kwd"] = d - if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", - n.get( - "school_name", - ""))): d = "专升本" - if d: deg.append(d) + if d: + e["degree_kwd"] = d + if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", n.get("school_name",""))): + d = "专升本" + if d: + deg.append(d) # for first degree if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]: fdeg = [d] - if n.get("school_name"): fsch = [n["school_name"]] - if n.get("discipline_name"): fmaj = [n["discipline_name"]] + if n.get("school_name"): + fsch = [n["school_name"]] + if n.get("discipline_name"): + fmaj = [n["discipline_name"]] first_fea = copy.deepcopy(fea) edu_nst.append(e) @@ -140,16 +156,26 @@ def forEdu(cv): else: cv["sch_rank_kwd"].append("一般学校") - if edu_nst: cv["edu_nst"] = edu_nst - if fea: cv["edu_fea_kwd"] = list(set(fea)) - if first_fea: cv["edu_first_fea_kwd"] = list(set(first_fea)) - if maj: cv["major_kwd"] = maj - if fsch: cv["first_school_name_kwd"] = fsch - if fdeg: cv["first_degree_kwd"] = fdeg - if fmaj: cv["first_major_kwd"] = fmaj - if st_dt: cv["edu_start_kwd"] = st_dt - if ed_dt: cv["edu_end_kwd"] = ed_dt - if ed_dt: cv["edu_end_int"] = max([int(t) for t in ed_dt]) + if edu_nst: + cv["edu_nst"] = edu_nst + if fea: + cv["edu_fea_kwd"] = list(set(fea)) + if first_fea: + cv["edu_first_fea_kwd"] = list(set(first_fea)) + if maj: + cv["major_kwd"] = maj + if fsch: + cv["first_school_name_kwd"] = fsch + if fdeg: + cv["first_degree_kwd"] = fdeg + if fmaj: + cv["first_major_kwd"] = fmaj + if st_dt: + cv["edu_start_kwd"] = st_dt + if ed_dt: + cv["edu_end_kwd"] = ed_dt + if ed_dt: + cv["edu_end_int"] = max([int(t) for t in ed_dt]) if deg: if "本科" in deg and "专科" in deg: deg.append("专升本") @@ -158,8 +184,10 @@ def forEdu(cv): cv["highest_degree_kwd"] = highest_degree(deg) if edu_end_dt: try: - if re.match(r"[0-9]{9,}", edu_end_dt): edu_end_dt = turnTm2Dt(edu_end_dt) - if edu_end_dt.strip("\n") == "至今": edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today())) + if re.match(r"[0-9]{9,}", edu_end_dt): + edu_end_dt = turnTm2Dt(edu_end_dt) + if edu_end_dt.strip("\n") == "至今": + edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today())) y, m, d = getYMD(edu_end_dt) cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000)) except Exception as e: @@ -171,7 +199,8 @@ def forEdu(cv): or not cv.get("degree_kwd"): for c in sch: if schools.is_good(c): - if "tag_kwd" not in cv: cv["tag_kwd"] = [] + if "tag_kwd" not in cv: + cv["tag_kwd"] = [] cv["tag_kwd"].append("好学校") cv["tag_kwd"].append("好学历") break @@ -180,28 +209,39 @@ def forEdu(cv): any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \ or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \ or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]): - if "tag_kwd" not in cv: cv["tag_kwd"] = [] - if "好学历" not in cv["tag_kwd"]: cv["tag_kwd"].append("好学历") - - if cv.get("major_kwd"): cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj)) - if cv.get("school_name_kwd"): cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch)) - if cv.get("first_school_name_kwd"): cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch)) - if cv.get("first_major_kwd"): cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj)) + if "tag_kwd" not in cv: + cv["tag_kwd"] = [] + if "好学历" not in cv["tag_kwd"]: + cv["tag_kwd"].append("好学历") + + if cv.get("major_kwd"): + cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj)) + if cv.get("school_name_kwd"): + cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch)) + if cv.get("first_school_name_kwd"): + cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch)) + if cv.get("first_major_kwd"): + cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj)) return cv def forProj(cv): - if not cv.get("project_obj"): return cv + if not cv.get("project_obj"): + return cv pro_nms, desc = [], [] for i, n in enumerate( - sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if type(x) == type({}) else "", + sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if isinstance(x, dict) else "", reverse=True)): - if n.get("name"): pro_nms.append(n["name"]) - if n.get("describe"): desc.append(str(n["describe"])) - if n.get("responsibilities"): desc.append(str(n["responsibilities"])) - if n.get("achivement"): desc.append(str(n["achivement"])) + if n.get("name"): + pro_nms.append(n["name"]) + if n.get("describe"): + desc.append(str(n["describe"])) + if n.get("responsibilities"): + desc.append(str(n["responsibilities"])) + if n.get("achivement"): + desc.append(str(n["achivement"])) if pro_nms: # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms)) @@ -233,15 +273,16 @@ def forWork(cv): work_st_tm = "" corp_tags = [] for i, n in enumerate( - sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if type(x) == type({}) else "", + sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if isinstance(x, dict) else "", reverse=True)): - if type(n) == type(""): + if isinstance(n, str): try: n = json_loads(n) except Exception: continue - if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"] + if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): + work_st_tm = n["start_time"] for c in flds: if not n.get(c) or str(n[c]) == '0': fea[c].append("") @@ -262,14 +303,18 @@ def forWork(cv): fea[c].append(rmHtmlTag(str(n[c]).lower())) y, m, d = getYMD(n.get("start_time")) - if not y or not m: continue + if not y or not m: + continue st = "%s-%02d-%02d" % (y, int(m), int(d)) latest_job_tm = st y, m, d = getYMD(n.get("end_time")) - if (not y or not m) and i > 0: continue - if not y or not m or int(y) > 2022: y, m, d = getYMD(str(n.get("updated_at", ""))) - if not y or not m: continue + if (not y or not m) and i > 0: + continue + if not y or not m or int(y) > 2022: + y, m, d = getYMD(str(n.get("updated_at", ""))) + if not y or not m: + continue ed = "%s-%02d-%02d" % (y, int(m), int(d)) try: @@ -279,22 +324,28 @@ def forWork(cv): if n.get("scale"): r = re.search(r"^([0-9]+)", str(n["scale"])) - if r: scales.append(int(r.group(1))) + if r: + scales.append(int(r.group(1))) if goodcorp: - if "tag_kwd" not in cv: cv["tag_kwd"] = [] + if "tag_kwd" not in cv: + cv["tag_kwd"] = [] cv["tag_kwd"].append("好公司") if goodcorp_: - if "tag_kwd" not in cv: cv["tag_kwd"] = [] + if "tag_kwd" not in cv: + cv["tag_kwd"] = [] cv["tag_kwd"].append("好公司(曾)") if corp_tags: - if "tag_kwd" not in cv: cv["tag_kwd"] = [] + if "tag_kwd" not in cv: + cv["tag_kwd"] = [] cv["tag_kwd"].extend(corp_tags) cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)] - if latest_job_tm: cv["latest_job_dt"] = latest_job_tm - if fea["corporation_id"]: cv["corporation_id"] = fea["corporation_id"] + if latest_job_tm: + cv["latest_job_dt"] = latest_job_tm + if fea["corporation_id"]: + cv["corporation_id"] = fea["corporation_id"] if fea["position_name"]: cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0]) @@ -317,18 +368,23 @@ def forWork(cv): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0]) cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:])) - if fea["subordinates_count"]: fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if + if fea["subordinates_count"]: + fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if re.match(r"[^0-9]+$", str(i))] - if fea["subordinates_count"]: cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"]) + if fea["subordinates_count"]: + cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"]) - if type(cv.get("corporation_id")) == type(1): cv["corporation_id"] = [str(cv["corporation_id"])] - if not cv.get("corporation_id"): cv["corporation_id"] = [] + if isinstance(cv.get("corporation_id"), int): + cv["corporation_id"] = [str(cv["corporation_id"])] + if not cv.get("corporation_id"): + cv["corporation_id"] = [] for i in cv.get("corporation_id", []): cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0) if work_st_tm: try: - if re.match(r"[0-9]{9,}", work_st_tm): work_st_tm = turnTm2Dt(work_st_tm) + if re.match(r"[0-9]{9,}", work_st_tm): + work_st_tm = turnTm2Dt(work_st_tm) y, m, d = getYMD(work_st_tm) cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000)) except Exception as e: @@ -339,28 +395,37 @@ def forWork(cv): cv["dua_flt"] = np.mean(duas) cv["cur_dua_int"] = duas[0] cv["job_num_int"] = len(duas) - if scales: cv["scale_flt"] = np.max(scales) + if scales: + cv["scale_flt"] = np.max(scales) return cv def turnTm2Dt(b): - if not b: return + if not b: + return b = str(b).strip() - if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10]))) + if re.match(r"[0-9]{10,}", b): + b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10]))) return b def getYMD(b): y, m, d = "", "", "01" - if not b: return (y, m, d) + if not b: + return (y, m, d) b = turnTm2Dt(b) - if re.match(r"[0-9]{4}", b): y = int(b[:4]) + if re.match(r"[0-9]{4}", b): + y = int(b[:4]) r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b) - if r: m = r.group(1) + if r: + m = r.group(1) r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b) - if r: d = r.group(1) - if not d or int(d) == 0 or int(d) > 31: d = "1" - if not m or int(m) > 12 or int(m) < 1: m = "1" + if r: + d = r.group(1) + if not d or int(d) == 0 or int(d) > 31: + d = "1" + if not m or int(m) > 12 or int(m) < 1: + m = "1" return (y, m, d) @@ -369,7 +434,8 @@ def birth(cv): cv["integerity_flt"] *= 0.9 return cv y, m, d = getYMD(cv["birth"]) - if not m or not y: return cv + if not m or not y: + return cv b = "%s-%02d-%02d" % (y, int(m), int(d)) cv["birth_dt"] = b cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d)) @@ -380,7 +446,8 @@ def birth(cv): def parse(cv): for k in cv.keys(): - if cv[k] == '\\N': cv[k] = '' + if cv[k] == '\\N': + cv[k] = '' # cv = cv.asDict() tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names", "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name", @@ -402,9 +469,12 @@ def parse(cv): rmkeys = [] for k in cv.keys(): - if cv[k] is None: rmkeys.append(k) - if (type(cv[k]) == type([]) or type(cv[k]) == type("")) and len(cv[k]) == 0: rmkeys.append(k) - for k in rmkeys: del cv[k] + if cv[k] is None: + rmkeys.append(k) + if (isinstance(cv[k], list) or isinstance(cv[k], str)) and len(cv[k]) == 0: + rmkeys.append(k) + for k in rmkeys: + del cv[k] integerity = 0. flds_num = 0. @@ -414,7 +484,8 @@ def parse(cv): flds_num += len(flds) for f in flds: v = str(cv.get(f, "")) - if len(v) > 0 and v != '0' and v != '[]': integerity += 1 + if len(v) > 0 and v != '0' and v != '[]': + integerity += 1 hasValues(tks_fld) hasValues(small_tks_fld) @@ -433,7 +504,8 @@ def parse(cv): (r"[ ()\(\)人/·0-9-]+", ""), (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]: cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE) - if len(cv["corporation_type"]) < 2: del cv["corporation_type"] + if len(cv["corporation_type"]) < 2: + del cv["corporation_type"] if cv.get("political_status"): for p, r in [ @@ -441,9 +513,11 @@ def parse(cv): (r".*(无党派|公民).*", "群众"), (r".*团员.*", "团员")]: cv["political_status"] = re.sub(p, r, cv["political_status"]) - if not re.search(r"[党团群]", cv["political_status"]): del cv["political_status"] + if not re.search(r"[党团群]", cv["political_status"]): + del cv["political_status"] - if cv.get("phone"): cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"])) + if cv.get("phone"): + cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"])) keys = list(cv.keys()) for k in keys: @@ -454,9 +528,11 @@ def parse(cv): cv[k] = [a for _, a in cv[k].items()] nms = [] for n in cv[k]: - if type(n) != type({}) or "name" not in n or not n.get("name"): continue + if not isinstance(n, dict) or "name" not in n or not n.get("name"): + continue n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower() - if not n["name"]: continue + if not n["name"]: + continue nms.append(n["name"]) if nms: t = k[:-4] @@ -469,15 +545,18 @@ def parse(cv): # tokenize fields if k in tks_fld: cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k]) - if k in small_tks_fld: cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"]) + if k in small_tks_fld: + cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"]) # keyword fields - if k in kwd_fld: cv[f"{k}_kwd"] = [n.lower() + if k in kwd_fld: + cv[f"{k}_kwd"] = [n.lower() for n in re.split(r"[\t,,;;. ]", re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k]) ) if n] - if k in num_fld and cv.get(k): cv[f"{k}_int"] = cv[k] + if k in num_fld and cv.get(k): + cv[f"{k}_int"] = cv[k] cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "") # for name field @@ -501,10 +580,12 @@ def parse(cv): cv["name_py_pref0_tks"] = "" cv["name_py_pref_tks"] = "" for py in PY.get_pinyins(nm[:20], ''): - for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i] + for i in range(2, len(py) + 1): + cv["name_py_pref_tks"] += " " + py[:i] for py in PY.get_pinyins(nm[:20], ' '): py = py.split() - for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i]) + for i in range(1, len(py) + 1): + cv["name_py_pref0_tks"] += " " + "".join(py[:i]) cv["name_kwd"] = name cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3] @@ -526,22 +607,30 @@ def parse(cv): cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S') else: y, m, d = getYMD(str(cv.get("updated_at", ""))) - if not y: y = "2012" - if not m: m = "01" - if not d: d = "01" + if not y: + y = "2012" + if not m: + m = "01" + if not d: + d = "01" cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d)) # long text tokenize - if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"])) + if cv.get("responsibilities"): + cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"])) # for yes or no field fea = [] for f, y, n in is_fld: - if f not in cv: continue - if cv[f] == '是': fea.append(y) - if cv[f] == '否': fea.append(n) + if f not in cv: + continue + if cv[f] == '是': + fea.append(y) + if cv[f] == '否': + fea.append(n) - if fea: cv["tag_kwd"] = fea + if fea: + cv["tag_kwd"] = fea cv = forEdu(cv) cv = forProj(cv) @@ -550,9 +639,11 @@ def parse(cv): cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])] for i in range(len(cv["corp_proj_sch_deg_kwd"])): - for j in cv.get("sch_rank_kwd", []): cv["corp_proj_sch_deg_kwd"][i] += "+" + j + for j in cv.get("sch_rank_kwd", []): + cv["corp_proj_sch_deg_kwd"][i] += "+" + j for i in range(len(cv["corp_proj_sch_deg_kwd"])): - if cv.get("highest_degree_kwd"): cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"] + if cv.get("highest_degree_kwd"): + cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"] try: if not cv.get("work_exp_flt") and cv.get("work_start_time"): @@ -565,17 +656,21 @@ def parse(cv): cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y) except Exception as e: logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time"))) - if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12. + if "work_exp_flt" not in cv and cv.get("work_experience", 0): + cv["work_exp_flt"] = int(cv["work_experience"]) / 12. keys = list(cv.keys()) for k in keys: - if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): del cv[k] + if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): + del cv[k] for k in cv.keys(): - if not re.search("_(kwd|id)$", k) or type(cv[k]) != type([]): continue + if not re.search("_(kwd|id)$", k) or not isinstance(cv[k], list): + continue cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']])) keys = [k for k in cv.keys() if re.search(r"_feas*$", k)] for k in keys: - if cv[k] <= 0: del cv[k] + if cv[k] <= 0: + del cv[k] cv["tob_resume_id"] = str(cv["tob_resume_id"]) cv["id"] = cv["tob_resume_id"] @@ -592,5 +687,6 @@ def dealWithInt64(d): if isinstance(d, list): d = [dealWithInt64(t) for t in d] - if isinstance(d, np.integer): d = int(d) + if isinstance(d, np.integer): + d = int(d) return d diff --git a/deepdoc/parser/txt_parser.py b/deepdoc/parser/txt_parser.py index 620368987ace8b0bf7fec456cebb4136675d3284..93b52eea32d4434f5e8fb949af4f88a1b7fed636 100644 --- a/deepdoc/parser/txt_parser.py +++ b/deepdoc/parser/txt_parser.py @@ -51,6 +51,7 @@ class RAGFlowTxtParser: dels = [d for d in dels if d] dels = "|".join(dels) secs = re.split(r"(%s)" % dels, txt) - for sec in secs: add_chunk(sec) + for sec in secs: + add_chunk(sec) return [[c, ""] for c in cks] diff --git a/deepdoc/vision/__init__.py b/deepdoc/vision/__init__.py index 9f16fe3d8c758e30044c1e9b5356e75afce3a10c..131827b57ad9fdb7039679653d026b0fefaaef25 100644 --- a/deepdoc/vision/__init__.py +++ b/deepdoc/vision/__init__.py @@ -18,7 +18,6 @@ from .recognizer import Recognizer from .layout_recognizer import LayoutRecognizer from .table_structure_recognizer import TableStructureRecognizer - def init_in_out(args): from PIL import Image import os @@ -47,7 +46,7 @@ def init_in_out(args): try: images.append(Image.open(fnm)) outputs.append(os.path.split(fnm)[-1]) - except Exception as e: + except Exception: traceback.print_exc() if os.path.isdir(args.inputs): @@ -56,6 +55,16 @@ def init_in_out(args): else: images_and_outputs(args.inputs) - for i in range(len(outputs)): outputs[i] = os.path.join(args.output_dir, outputs[i]) + for i in range(len(outputs)): + outputs[i] = os.path.join(args.output_dir, outputs[i]) + + return images, outputs + - return images, outputs \ No newline at end of file +__all__ = [ + "OCR", + "Recognizer", + "LayoutRecognizer", + "TableStructureRecognizer", + "init_in_out", +] diff --git a/deepdoc/vision/layout_recognizer.py b/deepdoc/vision/layout_recognizer.py index 88006f9af60a88ff7aba0abded64c1e701bc248e..e8a6fdc5cfd83bdb3165e35f2232f42709b491d5 100644 --- a/deepdoc/vision/layout_recognizer.py +++ b/deepdoc/vision/layout_recognizer.py @@ -42,7 +42,7 @@ class LayoutRecognizer(Recognizer): get_project_base_directory(), "rag/res/deepdoc") super().__init__(self.labels, domain, model_dir) - except Exception as e: + except Exception: model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), local_dir_use_symlinks=False) @@ -77,7 +77,7 @@ class LayoutRecognizer(Recognizer): "page_number": pn, } for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts] lts = self.sort_Y_firstly(lts, np.mean( - [l["bottom"] - l["top"] for l in lts]) / 2) + [lt["bottom"] - lt["top"] for lt in lts]) / 2) lts = self.layouts_cleanup(bxs, lts) page_layout.append(lts) diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py index 317b671fdd4b6e70a1f6f318dad16cddfd97672a..ee8ca6ab1eafeace731f587370505be19d5b22dd 100644 --- a/deepdoc/vision/ocr.py +++ b/deepdoc/vision/ocr.py @@ -19,7 +19,9 @@ from huggingface_hub import snapshot_download from api.utils.file_utils import get_project_base_directory from .operators import * +import math import numpy as np +import cv2 import onnxruntime as ort from .postprocess import build_post_process @@ -484,7 +486,7 @@ class OCR(object): "rag/res/deepdoc") self.text_detector = TextDetector(model_dir) self.text_recognizer = TextRecognizer(model_dir) - except Exception as e: + except Exception: model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), local_dir_use_symlinks=False) diff --git a/deepdoc/vision/operators.py b/deepdoc/vision/operators.py index 9037fc455238cb5b1d4c90c56b37b6cc202efad7..80ae299486569dc239e773cb292414042eb45c96 100644 --- a/deepdoc/vision/operators.py +++ b/deepdoc/vision/operators.py @@ -232,7 +232,7 @@ class LinearResize(object): """ assert len(self.target_size) == 2 assert self.target_size[0] > 0 and self.target_size[1] > 0 - im_channel = im.shape[2] + _im_channel = im.shape[2] im_scale_y, im_scale_x = self.generate_scale(im) im = cv2.resize( im, @@ -255,7 +255,7 @@ class LinearResize(object): im_scale_y: the resize ratio of Y """ origin_shape = im.shape[:2] - im_c = im.shape[2] + _im_c = im.shape[2] if self.keep_ratio: im_size_min = np.min(origin_shape) im_size_max = np.max(origin_shape) @@ -581,7 +581,7 @@ class SRResize(object): return data images_HR = data["image_hr"] - label_strs = data["label"] + _label_strs = data["label"] transform = ResizeNormalize((imgW, imgH)) images_HR = transform(images_HR) data["img_hr"] = images_HR diff --git a/deepdoc/vision/postprocess.py b/deepdoc/vision/postprocess.py index 9ab08e4ffa699c7403ce174b12bf81727a09a96f..6fb111de3bb843231aefae7edaa0bbeae9b44364 100644 --- a/deepdoc/vision/postprocess.py +++ b/deepdoc/vision/postprocess.py @@ -121,7 +121,7 @@ class DBPostProcess(object): outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) if len(outs) == 3: - img, contours, _ = outs[0], outs[1], outs[2] + _img, contours, _ = outs[0], outs[1], outs[2] elif len(outs) == 2: contours, _ = outs[0], outs[1] diff --git a/deepdoc/vision/recognizer.py b/deepdoc/vision/recognizer.py index 2181550e548fe1275f3a3570a86365216889a9c0..de5dc7ed06e2ab57de918cbab82fec3a55ff72ee 100644 --- a/deepdoc/vision/recognizer.py +++ b/deepdoc/vision/recognizer.py @@ -13,15 +13,18 @@ import logging import os +import math +import numpy as np +import cv2 from copy import deepcopy + import onnxruntime as ort from huggingface_hub import snapshot_download from api.utils.file_utils import get_project_base_directory from .operators import * - class Recognizer(object): def __init__(self, label_list, task_name, model_dir=None): """ @@ -277,7 +280,8 @@ class Recognizer(object): return min_dis, min_i = 1000000, None for i,b in enumerate(boxes): - if box.get("layoutno", "0") != b.get("layoutno", "0"): continue + if box.get("layoutno", "0") != b.get("layoutno", "0"): + continue dis = min(abs(box["x0"] - b["x0"]), abs(box["x1"] - b["x1"]), abs(box["x0"]+box["x1"] - b["x1"] - b["x0"])/2) if dis < min_dis: min_i = i @@ -402,7 +406,8 @@ class Recognizer(object): scores = np.max(boxes[:, 4:], axis=1) boxes = boxes[scores > thr, :] scores = scores[scores > thr] - if len(boxes) == 0: return [] + if len(boxes) == 0: + return [] # Get the class with the highest confidence class_ids = np.argmax(boxes[:, 4:], axis=1) @@ -432,7 +437,8 @@ class Recognizer(object): for i in range(len(image_list)): if not isinstance(image_list[i], np.ndarray): imgs.append(np.array(image_list[i])) - else: imgs.append(image_list[i]) + else: + imgs.append(image_list[i]) batch_loop_cnt = math.ceil(float(len(imgs)) / batch_size) for i in range(batch_loop_cnt): diff --git a/graphrag/community_reports_extractor.py b/graphrag/community_reports_extractor.py index 25f7b170b78f870ad7efdd778b5a823062032d71..756a7811eb985e8e20ca78d8de49ae2c9f249cc6 100644 --- a/graphrag/community_reports_extractor.py +++ b/graphrag/community_reports_extractor.py @@ -88,7 +88,8 @@ class CommunityReportsExtractor: ("findings", list), ("rating", float), ("rating_explanation", str), - ]): continue + ]): + continue response["weight"] = weight response["entities"] = ents except Exception as e: @@ -100,7 +101,8 @@ class CommunityReportsExtractor: res_str.append(self._get_text_output(response)) res_dict.append(response) over += 1 - if callback: callback(msg=f"Communities: {over}/{total}, elapsed: {timer() - st}s, used tokens: {token_count}") + if callback: + callback(msg=f"Communities: {over}/{total}, elapsed: {timer() - st}s, used tokens: {token_count}") return CommunityReportsResult( structured_output=res_dict, diff --git a/graphrag/entity_embedding.py b/graphrag/entity_embedding.py index 892d7db39896cc3364825fc436a55c391dcf5194..af7bc2a7ba3ce0d02b5e0f1b80112147312a519b 100644 --- a/graphrag/entity_embedding.py +++ b/graphrag/entity_embedding.py @@ -8,6 +8,7 @@ Reference: from typing import Any import numpy as np import networkx as nx +from dataclasses import dataclass from graphrag.leiden import stable_largest_connected_component diff --git a/graphrag/graph_extractor.py b/graphrag/graph_extractor.py index 0a83454027c3a3514dda6d2adae55282ffe9091d..290390ac9c44b1a8bcc16f3dab55482d27ce55c0 100644 --- a/graphrag/graph_extractor.py +++ b/graphrag/graph_extractor.py @@ -129,9 +129,11 @@ class GraphExtractor: source_doc_map[doc_index] = text all_records[doc_index] = result total_token_count += token_count - if callback: callback(msg=f"{doc_index+1}/{total}, elapsed: {timer() - st}s, used tokens: {total_token_count}") + if callback: + callback(msg=f"{doc_index+1}/{total}, elapsed: {timer() - st}s, used tokens: {total_token_count}") except Exception as e: - if callback: callback(msg="Knowledge graph extraction error:{}".format(str(e))) + if callback: + callback(msg="Knowledge graph extraction error:{}".format(str(e))) logging.exception("error extracting graph") self._on_error( e, @@ -164,7 +166,8 @@ class GraphExtractor: text = perform_variable_replacements(self._extraction_prompt, variables=variables) gen_conf = {"temperature": 0.3} response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf) - if response.find("**ERROR**") >= 0: raise Exception(response) + if response.find("**ERROR**") >= 0: + raise Exception(response) token_count = num_tokens_from_string(text + response) results = response or "" @@ -175,7 +178,8 @@ class GraphExtractor: text = perform_variable_replacements(CONTINUE_PROMPT, history=history, variables=variables) history.append({"role": "user", "content": text}) response = self._llm.chat("", history, gen_conf) - if response.find("**ERROR**") >=0: raise Exception(response) + if response.find("**ERROR**") >=0: + raise Exception(response) results += response or "" # if this is the final glean, don't bother updating the continuation flag diff --git a/graphrag/index.py b/graphrag/index.py index 89e332cd02948114854114bbeb1524421dc6f887..09c62a271937fefb7a33ed0f40997bbb9110db0d 100644 --- a/graphrag/index.py +++ b/graphrag/index.py @@ -134,7 +134,8 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: list[str], callback, en callback(0.75, "Extracting mind graph.") mindmap = MindMapExtractor(llm_bdl) mg = mindmap(_chunks).output - if not len(mg.keys()): return chunks + if not len(mg.keys()): + return chunks logging.debug(json.dumps(mg, ensure_ascii=False, indent=2)) chunks.append( diff --git a/graphrag/leiden.py b/graphrag/leiden.py index 4c87f085f1764756e5147954136dd34f55a817d3..315e6ff933745689bcb8c898061260921299b7bb 100644 --- a/graphrag/leiden.py +++ b/graphrag/leiden.py @@ -78,7 +78,8 @@ def _compute_leiden_communities( ) -> dict[int, dict[str, int]]: """Return Leiden root communities.""" results: dict[int, dict[str, int]] = {} - if is_empty(graph): return results + if is_empty(graph): + return results if use_lcc: graph = stable_largest_connected_component(graph) @@ -100,7 +101,8 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]: logging.debug( "Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc ) - if not graph.nodes(): return {} + if not graph.nodes(): + return {} node_id_to_community_map = _compute_leiden_communities( graph=graph, @@ -125,9 +127,11 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]: result[community_id]["nodes"].append(node_id) result[community_id]["weight"] += graph.nodes[node_id].get("rank", 0) * graph.nodes[node_id].get("weight", 1) weights = [comm["weight"] for _, comm in result.items()] - if not weights:continue + if not weights: + continue max_weight = max(weights) - for _, comm in result.items(): comm["weight"] /= max_weight + for _, comm in result.items(): + comm["weight"] /= max_weight return results_by_level diff --git a/intergrations/chatgpt-on-wechat/plugins/__init__.py b/intergrations/chatgpt-on-wechat/plugins/__init__.py index 4b79b693129324968bfb9e99d86f00e3b5e27e49..c1c3a156841ca77c2f2a32eceff0ab78ae69ff22 100644 --- a/intergrations/chatgpt-on-wechat/plugins/__init__.py +++ b/intergrations/chatgpt-on-wechat/plugins/__init__.py @@ -1 +1,5 @@ -from .ragflow_chat import * +from .ragflow_chat import RAGFlowChat + +__all__ = [ + "RAGFlowChat" +] diff --git a/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py b/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py index 17878c540bf07c0da924a7e91955d68c1c687dca..5d5615eaa17536a35f687efb06e4f946d7eedbcc 100644 --- a/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py +++ b/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py @@ -2,7 +2,6 @@ import logging import requests from bridge.context import ContextType # Import Context, ContextType from bridge.reply import Reply, ReplyType # Import Reply, ReplyType -from bridge import * from plugins import Plugin, register # Import Plugin and register from plugins.event import Event, EventContext, EventAction # Import event-related classes diff --git a/rag/app/book.py b/rag/app/book.py index 65de875a1e0ccd56b90d6eccd70d0611662518a6..763364778bc099b5f557d4a0acedb072acbdaa51 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -94,7 +94,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback(0.1, "Start to parse.") txt = get_text(filename, binary) sections = txt.split("\n") - sections = [(l, "") for l in sections if l] + sections = [(line, "") for line in sections if line] remove_contents_table(sections, eng=is_english( random_choices([t for t, _ in sections], k=200))) callback(0.8, "Finish parsing.") @@ -102,7 +102,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") sections = HtmlParser()(filename, binary) - sections = [(l, "") for l in sections if l] + sections = [(line, "") for line in sections if line] remove_contents_table(sections, eng=is_english( random_choices([t for t, _ in sections], k=200))) callback(0.8, "Finish parsing.") @@ -112,7 +112,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, binary = BytesIO(binary) doc_parsed = parser.from_buffer(binary) sections = doc_parsed['content'].split('\n') - sections = [(l, "") for l in sections if l] + sections = [(line, "") for line in sections if line] remove_contents_table(sections, eng=is_english( random_choices([t for t, _ in sections], k=200))) callback(0.8, "Finish parsing.") diff --git a/rag/app/email.py b/rag/app/email.py index 5226c78eead8fa86a3bfcbd715ffd9c1ec822ec1..b14ee6d43e04561208dcc78e558dc428e9defd5c 100644 --- a/rag/app/email.py +++ b/rag/app/email.py @@ -75,7 +75,7 @@ def chunk( _add_content(msg, msg.get_content_type()) sections = TxtParser.parser_txt("\n".join(text_txt)) + [ - (l, "") for l in HtmlParser.parser_txt("\n".join(html_txt)) if l + (line, "") for line in HtmlParser.parser_txt("\n".join(html_txt)) if line ] st = timer() diff --git a/rag/app/knowledge_graph.py b/rag/app/knowledge_graph.py index 74fbbec1018e7d8f9248a1fcd53a7382671294e3..b252d56153c46e7044f5aa330304335f2b937b56 100644 --- a/rag/app/knowledge_graph.py +++ b/rag/app/knowledge_graph.py @@ -18,7 +18,8 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000, chunks = build_knowledge_graph_chunks(tenant_id, sections, callback, parser_config.get("entity_types", ["organization", "person", "location", "event", "time"]) ) - for c in chunks: c["docnm_kwd"] = filename + for c in chunks: + c["docnm_kwd"] = filename doc = { "docnm_kwd": filename, diff --git a/rag/app/laws.py b/rag/app/laws.py index 3ee41c7a7a9299923e07e82a83b6742f03bb1e06..1e3dedfdc1b8a640efef186a0d27aef983ad7554 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -48,7 +48,7 @@ class Docx(DocxParser): continue if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: pn += 1 - return [l for l in lines if l] + return [line for line in lines if line] def __call__(self, filename, binary=None, from_page=0, to_page=100000): self.doc = Document( @@ -60,7 +60,8 @@ class Docx(DocxParser): if pn > to_page: break question_level, p_text = docx_question_level(p, bull) - if not p_text.strip("\n"):continue + if not p_text.strip("\n"): + continue lines.append((question_level, p_text)) for run in p.runs: @@ -78,19 +79,21 @@ class Docx(DocxParser): if lines[e][0] <= lines[s][0]: break e += 1 - if e - s == 1 and visit[s]: continue + if e - s == 1 and visit[s]: + continue sec = [] next_level = lines[s][0] + 1 while not sec and next_level < 22: for i in range(s+1, e): - if lines[i][0] != next_level: continue + if lines[i][0] != next_level: + continue sec.append(lines[i][1]) visit[i] = True next_level += 1 sec.insert(0, lines[s][1]) sections.append("\n".join(sec)) - return [l for l in sections if l] + return [s for s in sections if s] def __str__(self) -> str: return f''' @@ -168,13 +171,13 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback(0.1, "Start to parse.") txt = get_text(filename, binary) sections = txt.split("\n") - sections = [l for l in sections if l] + sections = [s for s in sections if s] callback(0.8, "Finish parsing.") elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") sections = HtmlParser()(filename, binary) - sections = [l for l in sections if l] + sections = [s for s in sections if s] callback(0.8, "Finish parsing.") elif re.search(r"\.doc$", filename, re.IGNORECASE): @@ -182,7 +185,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, binary = BytesIO(binary) doc_parsed = parser.from_buffer(binary) sections = doc_parsed['content'].split('\n') - sections = [l for l in sections if l] + sections = [s for s in sections if s] callback(0.8, "Finish parsing.") else: diff --git a/rag/app/manual.py b/rag/app/manual.py index 1ea5e9633d40470f7fb4bcbc9ed8353fa0f6c4cb..49acac1581178b9b1394c07be693bcdf47d4cc7a 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -190,7 +190,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, sections, tbls = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback) if sections and len(sections[0]) < 3: - sections = [(t, l, [[0] * 5]) for t, l in sections] + sections = [(t, lvl, [[0] * 5]) for t, lvl in sections] # set pivot using the most frequent type of title, # then merge between 2 pivot if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.1: @@ -211,7 +211,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, else: bull = bullets_category([txt for txt, _, _ in sections]) most_level, levels = title_frequency( - bull, [(txt, l) for txt, l, poss in sections]) + bull, [(txt, lvl) for txt, lvl, _ in sections]) assert len(sections) == len(levels) sec_ids = [] @@ -225,7 +225,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, sections = [(txt, sec_ids[i], poss) for i, (txt, _, poss) in enumerate(sections)] for (img, rows), poss in tbls: - if not rows: continue + if not rows: + continue sections.append((rows if isinstance(rows, str) else rows[0], -1, [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) diff --git a/rag/app/one.py b/rag/app/one.py index c7fe0314c2f75a0c4e64918bf87eefbe9927a7c7..bd691f46c3e66f5a4d352fae23c999e91208907d 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -54,7 +54,8 @@ class Pdf(PdfParser): sections = [(b["text"], self.get_position(b, zoomin)) for i, b in enumerate(self.boxes)] for (img, rows), poss in tbls: - if not rows:continue + if not rows: + continue sections.append((rows if isinstance(rows, str) else rows[0], [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) return [(txt, "") for txt, _ in sorted(sections, key=lambda x: ( @@ -109,7 +110,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, binary = BytesIO(binary) doc_parsed = parser.from_buffer(binary) sections = doc_parsed['content'].split('\n') - sections = [l for l in sections if l] + sections = [s for s in sections if s] callback(0.8, "Finish parsing.") else: diff --git a/rag/app/qa.py b/rag/app/qa.py index 0fd7a932b6325c19e6a4edad51f73afb0b340a53..d77daebd6430d1b63542755efe0d867f24ddca65 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -171,7 +171,7 @@ class Pdf(PdfParser): tbl_bottom = tbls[tbl_index][1][0][4] tbl_tag = "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \ .format(tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom) - tbl_text = ''.join(tbls[tbl_index][0][1]) + _tbl_text = ''.join(tbls[tbl_index][0][1]) return tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, @@ -325,9 +325,11 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): txt = get_text(filename, binary) lines = txt.split("\n") comma, tab = 0, 0 - for l in lines: - if len(l.split(",")) == 2: comma += 1 - if len(l.split("\t")) == 2: tab += 1 + for line in lines: + if len(line.split(",")) == 2: + comma += 1 + if len(line.split("\t")) == 2: + tab += 1 delimiter = "\t" if tab >= comma else "," fails = [] @@ -336,18 +338,21 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): while i < len(lines): arr = lines[i].split(delimiter) if len(arr) != 2: - if question: answer += "\n" + lines[i] + if question: + answer += "\n" + lines[i] else: fails.append(str(i+1)) elif len(arr) == 2: - if question and answer: res.append(beAdoc(deepcopy(doc), question, answer, eng)) + if question and answer: + res.append(beAdoc(deepcopy(doc), question, answer, eng)) question, answer = arr i += 1 if len(res) % 999 == 0: callback(len(res) * 0.6 / len(lines), ("Extract Q&A: {}".format(len(res)) + ( f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) - if question: res.append(beAdoc(deepcopy(doc), question, answer, eng)) + if question: + res.append(beAdoc(deepcopy(doc), question, answer, eng)) callback(0.6, ("Extract Q&A: {}".format(len(res)) + ( f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) @@ -367,19 +372,18 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): callback(0.1, "Start to parse.") txt = get_text(filename, binary) lines = txt.split("\n") - last_question, last_answer = "", "" + _last_question, last_answer = "", "" question_stack, level_stack = [], [] code_block = False - level_index = [-1] * 7 - for index, l in enumerate(lines): - if l.strip().startswith('```'): + for index, line in enumerate(lines): + if line.strip().startswith('```'): code_block = not code_block question_level, question = 0, '' if not code_block: - question_level, question = mdQuestionLevel(l) + question_level, question = mdQuestionLevel(line) if not question_level or question_level > 6: # not a question - last_answer = f'{last_answer}\n{l}' + last_answer = f'{last_answer}\n{line}' else: # is a question if last_answer.strip(): sum_question = '\n'.join(question_stack) diff --git a/rag/app/table.py b/rag/app/table.py index b5148983ce73bcabeb48d43d66e7951165b12722..d7ba35a6bbd57fbd8189abf1016a67d286a4a944 100644 --- a/rag/app/table.py +++ b/rag/app/table.py @@ -41,14 +41,16 @@ class Excel(ExcelParser): for sheetname in wb.sheetnames: ws = wb[sheetname] rows = list(ws.rows) - if not rows:continue + if not rows: + continue headers = [cell.value for cell in rows[0]] missed = set([i for i, h in enumerate(headers) if h is None]) headers = [ cell.value for i, cell in enumerate( rows[0]) if i not in missed] - if not headers:continue + if not headers: + continue data = [] for i, r in enumerate(rows[1:]): rn += 1 @@ -88,7 +90,6 @@ def trans_bool(s): def column_data_type(arr): arr = list(arr) - uni = len(set([a for a in arr if a is not None])) counts = {"int": 0, "float": 0, "text": 0, "datetime": 0, "bool": 0} trans = {t: f for f, t in [(int, "int"), (float, "float"), (trans_datatime, "datetime"), (trans_bool, "bool"), (str, "text")]} @@ -157,7 +158,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, continue if i >= to_page: break - row = [l for l in line.split(kwargs.get("delimiter", "\t"))] + row = [field for field in line.split(kwargs.get("delimiter", "\t"))] if len(row) != len(headers): fails.append(str(i)) continue diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 647b2a909e17600a8286f9c40f3f02e91994e8cd..bc35fe63dbfb2f6f828c62bb2a6e1488bd44cf5c 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -13,12 +13,124 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from .embedding_model import * -from .chat_model import * -from .cv_model import * -from .rerank_model import * -from .sequence2txt_model import * -from .tts_model import * +from .embedding_model import ( + OllamaEmbed, + LocalAIEmbed, + OpenAIEmbed, + AzureEmbed, + XinferenceEmbed, + QWenEmbed, + ZhipuEmbed, + FastEmbed, + YoudaoEmbed, + BaiChuanEmbed, + JinaEmbed, + DefaultEmbedding, + MistralEmbed, + BedrockEmbed, + GeminiEmbed, + NvidiaEmbed, + LmStudioEmbed, + OpenAI_APIEmbed, + CoHereEmbed, + TogetherAIEmbed, + PerfXCloudEmbed, + UpstageEmbed, + SILICONFLOWEmbed, + ReplicateEmbed, + BaiduYiyanEmbed, + VoyageEmbed, + HuggingFaceEmbed, + VolcEngineEmbed, +) +from .chat_model import ( + GptTurbo, + AzureChat, + ZhipuChat, + QWenChat, + OllamaChat, + LocalAIChat, + XinferenceChat, + MoonshotChat, + DeepSeekChat, + VolcEngineChat, + BaiChuanChat, + MiniMaxChat, + MistralChat, + GeminiChat, + BedrockChat, + GroqChat, + OpenRouterChat, + StepFunChat, + NvidiaChat, + LmStudioChat, + OpenAI_APIChat, + CoHereChat, + LeptonAIChat, + TogetherAIChat, + PerfXCloudChat, + UpstageChat, + NovitaAIChat, + SILICONFLOWChat, + YiChat, + ReplicateChat, + HunyuanChat, + SparkChat, + BaiduYiyanChat, + AnthropicChat, + GoogleChat, + HuggingFaceChat, +) + +from .cv_model import ( + GptV4, + AzureGptV4, + OllamaCV, + XinferenceCV, + QWenCV, + Zhipu4V, + LocalCV, + GeminiCV, + OpenRouterCV, + LocalAICV, + NvidiaCV, + LmStudioCV, + StepFunCV, + OpenAI_APICV, + TogetherAICV, + YiCV, + HunyuanCV, +) +from .rerank_model import ( + LocalAIRerank, + DefaultRerank, + JinaRerank, + YoudaoRerank, + XInferenceRerank, + NvidiaRerank, + LmStudioRerank, + OpenAI_APIRerank, + CoHereRerank, + TogetherAIRerank, + SILICONFLOWRerank, + BaiduYiyanRerank, + VoyageRerank, + QWenRerank, +) +from .sequence2txt_model import ( + GPTSeq2txt, + QWenSeq2txt, + AzureSeq2txt, + XinferenceSeq2txt, + TencentCloudSeq2txt, +) +from .tts_model import ( + FishAudioTTS, + QwenTTS, + OpenAITTS, + SparkTTS, + XinferenceTTS, +) EmbeddingModel = { "Ollama": OllamaEmbed, @@ -48,7 +160,7 @@ EmbeddingModel = { "BaiduYiyan": BaiduYiyanEmbed, "Voyage AI": VoyageEmbed, "HuggingFace": HuggingFaceEmbed, - "VolcEngine":VolcEngineEmbed, + "VolcEngine": VolcEngineEmbed, } CvModel = { @@ -68,7 +180,7 @@ CvModel = { "OpenAI-API-Compatible": OpenAI_APICV, "TogetherAI": TogetherAICV, "01.AI": YiCV, - "Tencent Hunyuan": HunyuanCV + "Tencent Hunyuan": HunyuanCV, } ChatModel = { @@ -111,7 +223,7 @@ ChatModel = { } RerankModel = { - "LocalAI":LocalAIRerank, + "LocalAI": LocalAIRerank, "BAAI": DefaultRerank, "Jina": JinaRerank, "Youdao": YoudaoRerank, @@ -132,7 +244,7 @@ Seq2txtModel = { "Tongyi-Qianwen": QWenSeq2txt, "Azure-OpenAI": AzureSeq2txt, "Xinference": XinferenceSeq2txt, - "Tencent Cloud": TencentCloudSeq2txt + "Tencent Cloud": TencentCloudSeq2txt, } TTSModel = { diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 26ce3e1489f617dbfcee997de393e3b3c925b3d4..cf038cb433c92e3e03f873d5cbb8dfaa1702e577 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -69,7 +69,8 @@ class Base(ABC): stream=True, **gen_conf) for resp in response: - if not resp.choices: continue + if not resp.choices: + continue if not resp.choices[0].delta.content: resp.choices[0].delta.content = "" ans += resp.choices[0].delta.content @@ -81,7 +82,8 @@ class Base(ABC): ) elif isinstance(resp.usage, dict): total_tokens = resp.usage.get("total_tokens", total_tokens) - else: total_tokens = resp.usage.total_tokens + else: + total_tokens = resp.usage.total_tokens if resp.choices[0].finish_reason == "length": if is_chinese(ans): @@ -98,13 +100,15 @@ class Base(ABC): class GptTurbo(Base): def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1"): - if not base_url: base_url = "https://api.openai.com/v1" + if not base_url: + base_url = "https://api.openai.com/v1" super().__init__(key, model_name, base_url) class MoonshotChat(Base): def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1"): - if not base_url: base_url = "https://api.moonshot.cn/v1" + if not base_url: + base_url = "https://api.moonshot.cn/v1" super().__init__(key, model_name, base_url) @@ -128,7 +132,8 @@ class HuggingFaceChat(Base): class DeepSeekChat(Base): def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1"): - if not base_url: base_url = "https://api.deepseek.com/v1" + if not base_url: + base_url = "https://api.deepseek.com/v1" super().__init__(key, model_name, base_url) @@ -202,7 +207,8 @@ class BaiChuanChat(Base): stream=True, **self._format_params(gen_conf)) for resp in response: - if not resp.choices: continue + if not resp.choices: + continue if not resp.choices[0].delta.content: resp.choices[0].delta.content = "" ans += resp.choices[0].delta.content @@ -313,8 +319,10 @@ class ZhipuChat(Base): if system: history.insert(0, {"role": "system", "content": system}) try: - if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"] + if "presence_penalty" in gen_conf: + del gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + del gen_conf["frequency_penalty"] response = self.client.chat.completions.create( model=self.model_name, messages=history, @@ -333,8 +341,10 @@ class ZhipuChat(Base): def chat_streamly(self, system, history, gen_conf): if system: history.insert(0, {"role": "system", "content": system}) - if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"] + if "presence_penalty" in gen_conf: + del gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + del gen_conf["frequency_penalty"] ans = "" tk_count = 0 try: @@ -345,7 +355,8 @@ class ZhipuChat(Base): **gen_conf ) for resp in response: - if not resp.choices[0].delta.content: continue + if not resp.choices[0].delta.content: + continue delta = resp.choices[0].delta.content ans += delta if resp.choices[0].finish_reason == "length": @@ -354,7 +365,8 @@ class ZhipuChat(Base): else: ans += LENGTH_NOTIFICATION_EN tk_count = resp.usage.total_tokens - if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens + if resp.choices[0].finish_reason == "stop": + tk_count = resp.usage.total_tokens yield ans except Exception as e: yield ans + "\n**ERROR**: " + str(e) @@ -372,11 +384,16 @@ class OllamaChat(Base): history.insert(0, {"role": "system", "content": system}) try: options = {} - if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"] - if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"] - if "top_p" in gen_conf: options["top_p"] = gen_conf["top_p"] - if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] + if "temperature" in gen_conf: + options["temperature"] = gen_conf["temperature"] + if "max_tokens" in gen_conf: + options["num_predict"] = gen_conf["max_tokens"] + if "top_p" in gen_conf: + options["top_p"] = gen_conf["top_p"] + if "presence_penalty" in gen_conf: + options["presence_penalty"] = gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + options["frequency_penalty"] = gen_conf["frequency_penalty"] response = self.client.chat( model=self.model_name, messages=history, @@ -392,11 +409,16 @@ class OllamaChat(Base): if system: history.insert(0, {"role": "system", "content": system}) options = {} - if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"] - if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"] - if "top_p" in gen_conf: options["top_p"] = gen_conf["top_p"] - if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] + if "temperature" in gen_conf: + options["temperature"] = gen_conf["temperature"] + if "max_tokens" in gen_conf: + options["num_predict"] = gen_conf["max_tokens"] + if "top_p" in gen_conf: + options["top_p"] = gen_conf["top_p"] + if "presence_penalty" in gen_conf: + options["presence_penalty"] = gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + options["frequency_penalty"] = gen_conf["frequency_penalty"] ans = "" try: response = self.client.chat( @@ -636,7 +658,8 @@ class MistralChat(Base): messages=history, **gen_conf) for resp in response: - if not resp.choices or not resp.choices[0].delta.content: continue + if not resp.choices or not resp.choices[0].delta.content: + continue ans += resp.choices[0].delta.content total_tokens += 1 if resp.choices[0].finish_reason == "length": @@ -1196,7 +1219,8 @@ class SparkChat(Base): assert model_name in model2version or model_name in version2model, f"The given model name is not supported yet. Support: {list(model2version.keys())}" if model_name in model2version: model_version = model2version[model_name] - else: model_version = model_name + else: + model_version = model_name super().__init__(key, model_version, base_url) @@ -1281,8 +1305,10 @@ class AnthropicChat(Base): self.system = system if "max_tokens" not in gen_conf: gen_conf["max_tokens"] = 4096 - if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"] + if "presence_penalty" in gen_conf: + del gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + del gen_conf["frequency_penalty"] ans = "" try: @@ -1312,8 +1338,10 @@ class AnthropicChat(Base): self.system = system if "max_tokens" not in gen_conf: gen_conf["max_tokens"] = 4096 - if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"] + if "presence_penalty" in gen_conf: + del gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + del gen_conf["frequency_penalty"] ans = "" total_tokens = 0 diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 70e9f24ea5068dea4c8dc5acf42453768bfa7685..48a55b674724d314c48b031a5d715ad949484f1f 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -25,6 +25,7 @@ import base64 from io import BytesIO import json import requests +from transformers import GenerationConfig from rag.nlp import is_english from api.utils import get_uuid @@ -77,14 +78,16 @@ class Base(ABC): stream=True ) for resp in response: - if not resp.choices[0].delta.content: continue + if not resp.choices[0].delta.content: + continue delta = resp.choices[0].delta.content ans += delta if resp.choices[0].finish_reason == "length": ans += "...\nFor the content length reason, it stopped, continue?" if is_english( [ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?" tk_count = resp.usage.total_tokens - if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens + if resp.choices[0].finish_reason == "stop": + tk_count = resp.usage.total_tokens yield ans except Exception as e: yield ans + "\n**ERROR**: " + str(e) @@ -99,7 +102,7 @@ class Base(ABC): buffered = BytesIO() try: image.save(buffered, format="JPEG") - except Exception as e: + except Exception: image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode("utf-8") @@ -139,7 +142,8 @@ class Base(ABC): class GptV4(Base): def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1"): - if not base_url: base_url="https://api.openai.com/v1" + if not base_url: + base_url="https://api.openai.com/v1" self.client = OpenAI(api_key=key, base_url=base_url) self.model_name = model_name self.lang = lang @@ -149,7 +153,8 @@ class GptV4(Base): prompt = self.prompt(b64) for i in range(len(prompt)): for c in prompt[i]["content"]: - if "text" in c: c["type"] = "text" + if "text" in c: + c["type"] = "text" res = self.client.chat.completions.create( model=self.model_name, @@ -171,7 +176,8 @@ class AzureGptV4(Base): prompt = self.prompt(b64) for i in range(len(prompt)): for c in prompt[i]["content"]: - if "text" in c: c["type"] = "text" + if "text" in c: + c["type"] = "text" res = self.client.chat.completions.create( model=self.model_name, @@ -344,14 +350,16 @@ class Zhipu4V(Base): stream=True ) for resp in response: - if not resp.choices[0].delta.content: continue + if not resp.choices[0].delta.content: + continue delta = resp.choices[0].delta.content ans += delta if resp.choices[0].finish_reason == "length": ans += "...\nFor the content length reason, it stopped, continue?" if is_english( [ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?" tk_count = resp.usage.total_tokens - if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens + if resp.choices[0].finish_reason == "stop": + tk_count = resp.usage.total_tokens yield ans except Exception as e: yield ans + "\n**ERROR**: " + str(e) @@ -389,11 +397,16 @@ class OllamaCV(Base): if his["role"] == "user": his["images"] = [image] options = {} - if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"] - if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"] - if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"] - if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] + if "temperature" in gen_conf: + options["temperature"] = gen_conf["temperature"] + if "max_tokens" in gen_conf: + options["num_predict"] = gen_conf["max_tokens"] + if "top_p" in gen_conf: + options["top_k"] = gen_conf["top_p"] + if "presence_penalty" in gen_conf: + options["presence_penalty"] = gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + options["frequency_penalty"] = gen_conf["frequency_penalty"] response = self.client.chat( model=self.model_name, messages=history, @@ -414,11 +427,16 @@ class OllamaCV(Base): if his["role"] == "user": his["images"] = [image] options = {} - if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"] - if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"] - if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"] - if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"] - if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] + if "temperature" in gen_conf: + options["temperature"] = gen_conf["temperature"] + if "max_tokens" in gen_conf: + options["num_predict"] = gen_conf["max_tokens"] + if "top_p" in gen_conf: + options["top_k"] = gen_conf["top_p"] + if "presence_penalty" in gen_conf: + options["presence_penalty"] = gen_conf["presence_penalty"] + if "frequency_penalty" in gen_conf: + options["frequency_penalty"] = gen_conf["frequency_penalty"] ans = "" try: response = self.client.chat( @@ -469,7 +487,7 @@ class XinferenceCV(Base): class GeminiCV(Base): def __init__(self, key, model_name="gemini-1.0-pro-vision-latest", lang="Chinese", **kwargs): - from google.generativeai import client, GenerativeModel, GenerationConfig + from google.generativeai import client, GenerativeModel client.configure(api_key=key) _client = client.get_default_generative_client() self.model_name = model_name @@ -503,7 +521,7 @@ class GeminiCV(Base): if his["role"] == "user": his["parts"] = [his["content"]] his.pop("content") - history[-1]["parts"].append(f"data:image/jpeg;base64," + image) + history[-1]["parts"].append("data:image/jpeg;base64," + image) response = self.model.generate_content(history, generation_config=GenerationConfig( max_output_tokens=gen_conf.get("max_tokens", 1000), temperature=gen_conf.get("temperature", 0.3), @@ -519,7 +537,6 @@ class GeminiCV(Base): history[-1]["content"] = system + history[-1]["content"] + "user query: " + history[-1]["content"] ans = "" - tk_count = 0 try: for his in history: if his["role"] == "assistant": @@ -529,14 +546,15 @@ class GeminiCV(Base): if his["role"] == "user": his["parts"] = [his["content"]] his.pop("content") - history[-1]["parts"].append(f"data:image/jpeg;base64," + image) + history[-1]["parts"].append("data:image/jpeg;base64," + image) response = self.model.generate_content(history, generation_config=GenerationConfig( max_output_tokens=gen_conf.get("max_tokens", 1000), temperature=gen_conf.get("temperature", 0.3), top_p=gen_conf.get("top_p", 0.7)), stream=True) for resp in response: - if not resp.text: continue + if not resp.text: + continue ans += resp.text yield ans except Exception as e: @@ -632,7 +650,8 @@ class NvidiaCV(Base): class StepFunCV(GptV4): def __init__(self, key, model_name="step-1v-8k", lang="Chinese", base_url="https://api.stepfun.com/v1"): - if not base_url: base_url="https://api.stepfun.com/v1" + if not base_url: + base_url="https://api.stepfun.com/v1" self.client = OpenAI(api_key=key, base_url=base_url) self.model_name = model_name self.lang = lang diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py index 950ea10ec35bcde8afc5eea370d6e290885c2cdf..73c8f37dff9be62600843aa6c70159d14f04e7eb 100644 --- a/rag/llm/sequence2txt_model.py +++ b/rag/llm/sequence2txt_model.py @@ -15,12 +15,9 @@ # import requests from openai.lib.azure import AzureOpenAI -from zhipuai import ZhipuAI import io from abc import ABC -from ollama import Client from openai import OpenAI -import os import json from rag.utils import num_tokens_from_string import base64 @@ -49,7 +46,8 @@ class Base(ABC): class GPTSeq2txt(Base): def __init__(self, key, model_name="whisper-1", base_url="https://api.openai.com/v1"): - if not base_url: base_url = "https://api.openai.com/v1" + if not base_url: + base_url = "https://api.openai.com/v1" self.client = OpenAI(api_key=key, base_url=base_url) self.model_name = model_name diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py index 814a5dfc37f1301e63fc98bc0e5ff8792c39c6d6..3ce0e44874532895da19f02ed03aff0a061640b2 100644 --- a/rag/llm/tts_model.py +++ b/rag/llm/tts_model.py @@ -16,7 +16,6 @@ import _thread as thread import base64 -import datetime import hashlib import hmac import json @@ -175,7 +174,8 @@ class QwenTTS(Base): class OpenAITTS(Base): def __init__(self, key, model_name="tts-1", base_url="https://api.openai.com/v1"): - if not base_url: base_url = "https://api.openai.com/v1" + if not base_url: + base_url = "https://api.openai.com/v1" self.api_key = key self.model_name = model_name self.base_url = base_url diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index ddca9b580a1f51fca71fc27e649e83fa1d1fb903..52687f276eac9e901e630bac89fcc01681d54c1e 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -222,7 +222,8 @@ def bullets_category(sections): def is_english(texts): eng = 0 - if not texts: return False + if not texts: + return False for t in texts: if re.match(r"[ `a-zA-Z.,':;/\"?<>!\(\)-]", t.strip()): eng += 1 @@ -250,7 +251,8 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None): res = [] # wrap up as es documents for ck in chunks: - if len(ck.strip()) == 0:continue + if len(ck.strip()) == 0: + continue logging.debug("-- {}".format(ck)) d = copy.deepcopy(doc) if pdf_parser: @@ -269,7 +271,8 @@ def tokenize_chunks_docx(chunks, doc, eng, images): res = [] # wrap up as es documents for ck, image in zip(chunks, images): - if len(ck.strip()) == 0:continue + if len(ck.strip()) == 0: + continue logging.debug("-- {}".format(ck)) d = copy.deepcopy(doc) d["image"] = image @@ -288,8 +291,10 @@ def tokenize_table(tbls, doc, eng, batch_size=10): d = copy.deepcopy(doc) tokenize(d, rows, eng) d["content_with_weight"] = rows - if img: d["image"] = img - if poss: add_positions(d, poss) + if img: + d["image"] = img + if poss: + add_positions(d, poss) res.append(d) continue de = "; " if eng else "; " @@ -387,9 +392,9 @@ def title_frequency(bull, sections): if re.search(r"(title|head)", layout) and not not_title(txt.split("@")[0]): levels[i] = bullets_size most_level = bullets_size+1 - for l, c in sorted(Counter(levels).items(), key=lambda x:x[1]*-1): - if l <= bullets_size: - most_level = l + for level, c in sorted(Counter(levels).items(), key=lambda x:x[1]*-1): + if level <= bullets_size: + most_level = level break return most_level, levels @@ -504,7 +509,8 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"): def add_chunk(t, pos): nonlocal cks, tk_nums, delimiter tnum = num_tokens_from_string(t) - if not pos: pos = "" + if not pos: + pos = "" if tnum < 8: pos = "" # Ensure that the length of the merged chunk does not exceed chunk_token_num diff --git a/rag/nlp/query.py b/rag/nlp/query.py index 6c018d2ca526c893e802224eab964c47c520d5aa..11e3f502aed60ffc99c6105926461621850dbb4f 100644 --- a/rag/nlp/query.py +++ b/rag/nlp/query.py @@ -121,7 +121,8 @@ class FulltextQueryer: keywords.append(tt) twts = self.tw.weights([tt]) syns = self.syn.lookup(tt) - if syns and len(keywords) < 32: keywords.extend(syns) + if syns and len(keywords) < 32: + keywords.extend(syns) logging.debug(json.dumps(twts, ensure_ascii=False)) tms = [] for tk, w in sorted(twts, key=lambda x: x[1] * -1): @@ -147,7 +148,8 @@ class FulltextQueryer: tk_syns = self.syn.lookup(tk) tk_syns = [FulltextQueryer.subSpecialChar(s) for s in tk_syns] - if len(keywords) < 32: keywords.extend([s for s in tk_syns if s]) + if len(keywords) < 32: + keywords.extend([s for s in tk_syns if s]) tk_syns = [rag_tokenizer.fine_grained_tokenize(s) for s in tk_syns if s] tk_syns = [f"\"{s}\"" if s.find(" ")>0 else s for s in tk_syns] diff --git a/rag/nlp/rag_tokenizer.py b/rag/nlp/rag_tokenizer.py index 0815daaeeef1bc6f3533d7b581c7c41781b2141b..953d940ee1c747490885e1a700f301f93addc407 100644 --- a/rag/nlp/rag_tokenizer.py +++ b/rag/nlp/rag_tokenizer.py @@ -104,7 +104,6 @@ class RagTokenizer: return HanziConv.toSimplified(line) def dfs_(self, chars, s, preTks, tkslist): - MAX_L = 10 res = s # if s > MAX_L or s>= len(chars): if s >= len(chars): @@ -184,12 +183,6 @@ class RagTokenizer: return sorted(res, key=lambda x: x[1], reverse=True) def merge_(self, tks): - patts = [ - (r"[ ]+", " "), - (r"([0-9\+\.,%\*=-]) ([0-9\+\.,%\*=-])", r"\1\2"), - ] - # for p,s in patts: tks = re.sub(p, s, tks) - # if split chars is part of token res = [] tks = re.sub(r"[ ]+", " ", tks).split() @@ -284,7 +277,8 @@ class RagTokenizer: same = 0 while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]: same += 1 - if same > 0: res.append(" ".join(tks[j: j + same])) + if same > 0: + res.append(" ".join(tks[j: j + same])) _i = i + same _j = j + same j = _j + 1 diff --git a/rag/nlp/term_weight.py b/rag/nlp/term_weight.py index bbf446ddfb63644b4b609e712225d0bc39529299..6ab49a2e3c950e7cdb8a0c3c35f915489245dbac 100644 --- a/rag/nlp/term_weight.py +++ b/rag/nlp/term_weight.py @@ -62,10 +62,10 @@ class Dealer: res = {} f = open(fnm, "r") while True: - l = f.readline() - if not l: + line = f.readline() + if not line: break - arr = l.replace("\n", "").split("\t") + arr = line.replace("\n", "").split("\t") if len(arr) < 2: res[arr[0]] = 0 else: diff --git a/rag/raptor.py b/rag/raptor.py index 51f1ad1177d20cad6105d15e776ea93f13b59319..6e11cf683e57c56bcc6777451ce8e55db4d7cfe5 100644 --- a/rag/raptor.py +++ b/rag/raptor.py @@ -47,7 +47,8 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: def __call__(self, chunks, random_state, callback=None): layers = [(0, len(chunks))] start, end = 0, len(chunks) - if len(chunks) <= 1: return + if len(chunks) <= 1: + return chunks = [(s, a) for s, a in chunks if len(a) > 0] def summarize(ck_idx, lock): @@ -66,7 +67,8 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: logging.debug(f"SUM: {cnt}") embds, _ = self._embd_model.encode([cnt]) with lock: - if not len(embds[0]): return + if not len(embds[0]): + return chunks.append((cnt, embds[0])) except Exception as e: logging.exception("summarize got exception") diff --git a/rag/svr/cache_file_svr.py b/rag/svr/cache_file_svr.py index 98769d3b53261eb6db46ea85f491677466020cc6..8b96a2af5b7d9e4b843c643c23bd23d7c6f0892d 100644 --- a/rag/svr/cache_file_svr.py +++ b/rag/svr/cache_file_svr.py @@ -33,14 +33,16 @@ def collect(): def main(): locations = collect() - if not locations:return + if not locations: + return logging.info(f"TASKS: {len(locations)}") for kb_id, loc in locations: try: if REDIS_CONN.is_alive(): try: key = "{}/{}".format(kb_id, loc) - if REDIS_CONN.exist(key):continue + if REDIS_CONN.exist(key): + continue file_bin = STORAGE_IMPL.get(kb_id, loc) REDIS_CONN.transaction(key, file_bin, 12 * 60) logging.info("CACHE: {}".format(loc)) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 7ab4b72540018620403ad4f1584cae3a56be0d00..aed8b7fa4a3a56276235047d183223ffe8e50e84 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -23,18 +23,12 @@ import os from api.utils.log_utils import initRootLogger -CONSUMER_NO = "0" if len(sys.argv) < 2 else sys.argv[1] -CONSUMER_NAME = "task_executor_" + CONSUMER_NO -LOG_LEVELS = os.environ.get("LOG_LEVELS", "") -initRootLogger(CONSUMER_NAME, LOG_LEVELS) from datetime import datetime import json -import os import hashlib import copy import re -import sys import time import threading from functools import partial @@ -63,6 +57,11 @@ from rag.utils import rmSpace, num_tokens_from_string from rag.utils.redis_conn import REDIS_CONN, Payload from rag.utils.storage_factory import STORAGE_IMPL +CONSUMER_NO = "0" if len(sys.argv) < 2 else sys.argv[1] +CONSUMER_NAME = "task_executor_" + CONSUMER_NO +LOG_LEVELS = os.environ.get("LOG_LEVELS", "") +initRootLogger(CONSUMER_NAME, LOG_LEVELS) + BATCH_SIZE = 64 FACTORY = { @@ -201,7 +200,8 @@ def build_chunks(task, progress_callback): "doc_id": task["doc_id"], "kb_id": str(task["kb_id"]) } - if task["pagerank"]: doc["pagerank_fea"] = int(task["pagerank"]) + if task["pagerank"]: + doc["pagerank_fea"] = int(task["pagerank"]) el = 0 for ck in cks: d = copy.deepcopy(doc) @@ -342,7 +342,8 @@ def run_raptor(row, chat_mdl, embd_mdl, callback=None): "docnm_kwd": row["name"], "title_tks": rag_tokenizer.tokenize(row["name"]) } - if row["pagerank"]: doc["pagerank_fea"] = int(row["pagerank"]) + if row["pagerank"]: + doc["pagerank_fea"] = int(row["pagerank"]) res = [] tk_count = 0 for content, vctr in chunks[original_length:]: diff --git a/rag/utils/__init__.py b/rag/utils/__init__.py index d75fb69c3deb79692e2eb00bafcbd6867d296403..e68e437354f691b91b0b2baf0d2019796b81974f 100644 --- a/rag/utils/__init__.py +++ b/rag/utils/__init__.py @@ -41,15 +41,15 @@ def findMaxDt(fnm): try: with open(fnm, "r") as f: while True: - l = f.readline() - if not l: + line = f.readline() + if not line: break - l = l.strip("\n") - if l == 'nan': + line = line.strip("\n") + if line == 'nan': continue - if l > m: - m = l - except Exception as e: + if line > m: + m = line + except Exception: pass return m @@ -59,15 +59,15 @@ def findMaxTm(fnm): try: with open(fnm, "r") as f: while True: - l = f.readline() - if not l: + line = f.readline() + if not line: break - l = l.strip("\n") - if l == 'nan': + line = line.strip("\n") + if line == 'nan': continue - if int(l) > m: - m = int(l) - except Exception as e: + if int(line) > m: + m = int(line) + except Exception: pass return m diff --git a/rag/utils/azure_sas_conn.py b/rag/utils/azure_sas_conn.py index 275f7fe6b18c07fa4c5d4dbd897d8624a60c3d3f..8c1be4cd05786614a2c95a056734d1d70278687f 100644 --- a/rag/utils/azure_sas_conn.py +++ b/rag/utils/azure_sas_conn.py @@ -32,7 +32,7 @@ class RAGFlowAzureSasBlob(object): self.conn = None def health(self): - bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1" + _bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1" return self.conn.upload_blob(name=fnm, data=BytesIO(binary), length=len(binary)) def put(self, bucket, fnm, binary): diff --git a/rag/utils/azure_spn_conn.py b/rag/utils/azure_spn_conn.py index 7081f892d1a7408d28eaa040db9eb7cff3150ce1..c45ceb79527d333540d559df157f712be4c26d44 100644 --- a/rag/utils/azure_spn_conn.py +++ b/rag/utils/azure_spn_conn.py @@ -36,7 +36,7 @@ class RAGFlowAzureSpnBlob(object): self.conn = None def health(self): - bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1" + _bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1" f = self.conn.create_file(fnm) f.append_data(binary, offset=0, length=len(binary)) return f.flush_data(len(binary)) diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index 059d4f00f295fa431c50f862b3aef3a8be0e3e96..a473833f4572d95904546d84aeca37c9865a4f74 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -132,7 +132,8 @@ class ESConnection(DocStoreConnection): bqry.filter.append( Q("bool", must_not=Q("range", available_int={"lt": 1}))) continue - if not v: continue + if not v: + continue if isinstance(v, list): bqry.filter.append(Q("terms", **{k: v})) elif isinstance(v, str) or isinstance(v, int): diff --git a/sdk/python/ragflow_sdk/__init__.py b/sdk/python/ragflow_sdk/__init__.py index a99e97c6beabbf19635ff392915ac1d91b215d93..f8df6aaf056df12727d90284659fd44a51b337df 100644 --- a/sdk/python/ragflow_sdk/__init__.py +++ b/sdk/python/ragflow_sdk/__init__.py @@ -1,14 +1,21 @@ -from beartype.claw import beartype_this_package -beartype_this_package() # <-- raise exceptions in your code - import importlib.metadata -__version__ = importlib.metadata.version("ragflow_sdk") - from .ragflow import RAGFlow from .modules.dataset import DataSet from .modules.chat import Chat from .modules.session import Session from .modules.document import Document from .modules.chunk import Chunk -from .modules.agent import Agent \ No newline at end of file +from .modules.agent import Agent + +__version__ = importlib.metadata.version("ragflow_sdk") + +__all__ = [ + "RAGFlow", + "DataSet", + "Chat", + "Session", + "Document", + "Chunk", + "Agent" +] \ No newline at end of file diff --git a/sdk/python/ragflow_sdk/modules/session.py b/sdk/python/ragflow_sdk/modules/session.py index f5c2072ee5f62ceea79942038a896f0e0821a242..539c1ce89bad80c0cb449346e956359b25623c5e 100644 --- a/sdk/python/ragflow_sdk/modules/session.py +++ b/sdk/python/ragflow_sdk/modules/session.py @@ -29,7 +29,7 @@ class Session(Base): raise Exception(json_data["message"]) if line.startswith("data:"): json_data = json.loads(line[5:]) - if json_data["data"] != True: + if not json_data["data"]: answer = json_data["data"]["answer"] reference = json_data["data"]["reference"] temp_dict = { diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index cd4df05f3837290c02068cf2f165034fc2c4ebe3..463d0a8813396f3431df0da4fb060d06e5d75266 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -1,5 +1,3 @@ -import string -import random import os import pytest import requests diff --git a/sdk/python/test/test_frontend_api/common.py b/sdk/python/test/test_frontend_api/common.py index 4e98357635be205445873d4c19ac48d696d868d9..1a8c174c53da4f9bb9757f4876257416e610e816 100644 --- a/sdk/python/test/test_frontend_api/common.py +++ b/sdk/python/test/test_frontend_api/common.py @@ -39,7 +39,6 @@ def update_dataset(auth, json_req): def upload_file(auth, dataset_id, path): authorization = {"Authorization": auth} url = f"{HOST_ADDRESS}/v1/document/upload" - base_name = os.path.basename(path) json_req = { "kb_id": dataset_id, } diff --git a/sdk/python/test/test_frontend_api/get_email.py b/sdk/python/test/test_frontend_api/get_email.py index df053fa768b636e0666c177036b10c5afbe5edcc..923b82d2d21ad75ebe0fd0556a47fd1e65180a67 100644 --- a/sdk/python/test/test_frontend_api/get_email.py +++ b/sdk/python/test/test_frontend_api/get_email.py @@ -1,3 +1,3 @@ def test_get_email(get_email): - print(f"\nEmail account:",flush=True) + print("\nEmail account:",flush=True) print(f"{get_email}\n",flush=True) \ No newline at end of file diff --git a/sdk/python/test/test_frontend_api/test_chunk.py b/sdk/python/test/test_frontend_api/test_chunk.py index 555b9360198a78bd09c2a5c0a92509b8cc69d4cc..afcab865d5a64eca9d9a0507d069d1b7f02e2231 100644 --- a/sdk/python/test/test_frontend_api/test_chunk.py +++ b/sdk/python/test/test_frontend_api/test_chunk.py @@ -13,14 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, upload_file, DATASET_NAME_LIMIT +from common import create_dataset, list_dataset, rm_dataset, upload_file from common import list_document, get_docs_info, parse_docs from time import sleep from timeit import default_timer as timer -import re -import pytest -import random -import string def test_parse_txt_document(get_auth): diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py index 8de82282935c2a8b7780296de0ec58a6875dfb03..fe2de715a8c4f72c59bee6b7a65bef1c49360e6e 100644 --- a/sdk/python/test/test_frontend_api/test_dataset.py +++ b/sdk/python/test/test_frontend_api/test_dataset.py @@ -1,6 +1,5 @@ -from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, DATASET_NAME_LIMIT +from common import create_dataset, list_dataset, rm_dataset, update_dataset, DATASET_NAME_LIMIT import re -import pytest import random import string @@ -33,8 +32,6 @@ def test_dataset(get_auth): def test_dataset_1k_dataset(get_auth): # create dataset - authorization = {"Authorization": get_auth} - url = f"{HOST_ADDRESS}/v1/kb/create" for i in range(1000): res = create_dataset(get_auth, f"test_create_dataset_{i}") assert res.get("code") == 0, f"{res.get('message')}" @@ -76,7 +73,7 @@ def test_duplicated_name_dataset(get_auth): dataset_id = item.get("id") dataset_list.append(dataset_id) match = re.match(pattern, dataset_name) - assert match != None + assert match is not None for dataset_id in dataset_list: res = rm_dataset(get_auth, dataset_id) diff --git a/sdk/python/test/test_sdk_api/get_email.py b/sdk/python/test/test_sdk_api/get_email.py index df053fa768b636e0666c177036b10c5afbe5edcc..923b82d2d21ad75ebe0fd0556a47fd1e65180a67 100644 --- a/sdk/python/test/test_sdk_api/get_email.py +++ b/sdk/python/test/test_sdk_api/get_email.py @@ -1,3 +1,3 @@ def test_get_email(get_email): - print(f"\nEmail account:",flush=True) + print("\nEmail account:",flush=True) print(f"{get_email}\n",flush=True) \ No newline at end of file diff --git a/sdk/python/test/test_sdk_api/t_agent.py b/sdk/python/test/test_sdk_api/t_agent.py index 2c092e85a1c47cf9e4d82ee196661ec9914e2ba1..f9f29c7e32d10ce912b04c3cbea460edfb80427a 100644 --- a/sdk/python/test/test_sdk_api/t_agent.py +++ b/sdk/python/test/test_sdk_api/t_agent.py @@ -1,4 +1,4 @@ -from ragflow_sdk import RAGFlow,Agent +from ragflow_sdk import RAGFlow from common import HOST_ADDRESS import pytest
{sheetname}