diff --git a/agent/canvas.py b/agent/canvas.py
index 41410e64e3d8531519130f5ea8a5451f67bd59c8..bfffabd59ce972d35f8c9ccdc0d00d03feb45ed3 100644
--- a/agent/canvas.py
+++ b/agent/canvas.py
@@ -133,7 +133,8 @@ class Canvas(ABC):
"components": {}
}
for k in self.dsl.keys():
- if k in ["components"]:continue
+ if k in ["components"]:
+ continue
dsl[k] = deepcopy(self.dsl[k])
for k, cpn in self.components.items():
@@ -158,7 +159,8 @@ class Canvas(ABC):
def get_compnent_name(self, cid):
for n in self.dsl["graph"]["nodes"]:
- if cid == n["id"]: return n["data"]["name"]
+ if cid == n["id"]:
+ return n["data"]["name"]
return ""
def run(self, **kwargs):
@@ -173,7 +175,8 @@ class Canvas(ABC):
if kwargs.get("stream"):
for an in ans():
yield an
- else: yield ans
+ else:
+ yield ans
return
if not self.path:
@@ -188,7 +191,8 @@ class Canvas(ABC):
def prepare2run(cpns):
nonlocal ran, ans
for c in cpns:
- if self.path[-1] and c == self.path[-1][-1]: continue
+ if self.path[-1] and c == self.path[-1][-1]:
+ continue
cpn = self.components[c]["obj"]
if cpn.component_name == "Answer":
self.answer.append(c)
@@ -197,7 +201,8 @@ class Canvas(ABC):
if c not in without_dependent_checking:
cpids = cpn.get_dependent_components()
if any([cc not in self.path[-1] for cc in cpids]):
- if c not in waiting: waiting.append(c)
+ if c not in waiting:
+ waiting.append(c)
continue
yield "*'{}'* is running...🕞".format(self.get_compnent_name(c))
ans = cpn.run(self.history, **kwargs)
@@ -211,10 +216,12 @@ class Canvas(ABC):
logging.debug(f"Canvas.run: {ran} {self.path}")
cpn_id = self.path[-1][ran]
cpn = self.get_component(cpn_id)
- if not cpn["downstream"]: break
+ if not cpn["downstream"]:
+ break
loop = self._find_loop()
- if loop: raise OverflowError(f"Too much loops: {loop}")
+ if loop:
+ raise OverflowError(f"Too much loops: {loop}")
if cpn["obj"].component_name.lower() in ["switch", "categorize", "relevant"]:
switch_out = cpn["obj"].output()[1].iloc[0, 0]
@@ -283,19 +290,22 @@ class Canvas(ABC):
def _find_loop(self, max_loops=6):
path = self.path[-1][::-1]
- if len(path) < 2: return False
+ if len(path) < 2:
+ return False
for i in range(len(path)):
if path[i].lower().find("answer") >= 0:
path = path[:i]
break
- if len(path) < 2: return False
+ if len(path) < 2:
+ return False
- for l in range(2, len(path) // 2):
- pat = ",".join(path[0:l])
+ for loc in range(2, len(path) // 2):
+ pat = ",".join(path[0:loc])
path_str = ",".join(path)
- if len(pat) >= len(path_str): return False
+ if len(pat) >= len(path_str):
+ return False
loop = max_loops
while path_str.find(pat) == 0 and loop >= 0:
loop -= 1
@@ -303,7 +313,7 @@ class Canvas(ABC):
return False
path_str = path_str[len(pat)+1:]
if loop < 0:
- pat = " => ".join([p.split(":")[0] for p in path[0:l]])
+ pat = " => ".join([p.split(":")[0] for p in path[0:loc]])
return pat + " => " + pat
return False
diff --git a/agent/component/__init__.py b/agent/component/__init__.py
index 9fa77288bac65af0ff8f2c847d0bc74c8a598761..2bb8669f472c82bc874003f1364ca20e6cb39fba 100644
--- a/agent/component/__init__.py
+++ b/agent/component/__init__.py
@@ -39,3 +39,73 @@ def component_class(class_name):
m = importlib.import_module("agent.component")
c = getattr(m, class_name)
return c
+
+__all__ = [
+ "Begin",
+ "BeginParam",
+ "Generate",
+ "GenerateParam",
+ "Retrieval",
+ "RetrievalParam",
+ "Answer",
+ "AnswerParam",
+ "Categorize",
+ "CategorizeParam",
+ "Switch",
+ "SwitchParam",
+ "Relevant",
+ "RelevantParam",
+ "Message",
+ "MessageParam",
+ "RewriteQuestion",
+ "RewriteQuestionParam",
+ "KeywordExtract",
+ "KeywordExtractParam",
+ "Concentrator",
+ "ConcentratorParam",
+ "Baidu",
+ "BaiduParam",
+ "DuckDuckGo",
+ "DuckDuckGoParam",
+ "Wikipedia",
+ "WikipediaParam",
+ "PubMed",
+ "PubMedParam",
+ "ArXiv",
+ "ArXivParam",
+ "Google",
+ "GoogleParam",
+ "Bing",
+ "BingParam",
+ "GoogleScholar",
+ "GoogleScholarParam",
+ "DeepL",
+ "DeepLParam",
+ "GitHub",
+ "GitHubParam",
+ "BaiduFanyi",
+ "BaiduFanyiParam",
+ "QWeather",
+ "QWeatherParam",
+ "ExeSQL",
+ "ExeSQLParam",
+ "YahooFinance",
+ "YahooFinanceParam",
+ "WenCai",
+ "WenCaiParam",
+ "Jin10",
+ "Jin10Param",
+ "TuShare",
+ "TuShareParam",
+ "AkShare",
+ "AkShareParam",
+ "Crawler",
+ "CrawlerParam",
+ "Invoke",
+ "InvokeParam",
+ "Template",
+ "TemplateParam",
+ "Email",
+ "EmailParam",
+ "component_class"
+]
diff --git a/agent/component/base.py b/agent/component/base.py
index 5825eba582ef3f76299c7d91f5327e8fb01b8a14..2660be7d37362e6ef2e7e656613de052d4b3d79e 100644
--- a/agent/component/base.py
+++ b/agent/component/base.py
@@ -428,7 +428,8 @@ class ComponentBase(ABC):
def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]:
o = getattr(self._param, self._param.output_var_name)
if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
- if not isinstance(o, list): o = [o]
+ if not isinstance(o, list):
+ o = [o]
o = pd.DataFrame(o)
if allow_partial or not isinstance(o, partial):
@@ -440,7 +441,8 @@ class ComponentBase(ABC):
for oo in o():
if not isinstance(oo, pd.DataFrame):
outs = pd.DataFrame(oo if isinstance(oo, list) else [oo])
- else: outs = oo
+ else:
+ outs = oo
return self._param.output_var_name, outs
def reset(self):
@@ -482,13 +484,15 @@ class ComponentBase(ABC):
outs.append(pd.DataFrame([{"content": q["value"]}]))
if outs:
df = pd.concat(outs, ignore_index=True)
- if "content" in df: df = df.drop_duplicates(subset=['content']).reset_index(drop=True)
+ if "content" in df:
+ df = df.drop_duplicates(subset=['content']).reset_index(drop=True)
return df
upstream_outs = []
for u in reversed_cpnts[::-1]:
- if self.get_component_name(u) in ["switch", "concentrator"]: continue
+ if self.get_component_name(u) in ["switch", "concentrator"]:
+ continue
if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
o = self._canvas.get_component(u)["obj"].output(allow_partial=False)[1]
if o is not None:
@@ -532,7 +536,8 @@ class ComponentBase(ABC):
reversed_cpnts.extend(self._canvas.path[-1])
for u in reversed_cpnts[::-1]:
- if self.get_component_name(u) in ["switch", "answer"]: continue
+ if self.get_component_name(u) in ["switch", "answer"]:
+ continue
return self._canvas.get_component(u)["obj"].output()[1]
@staticmethod
diff --git a/agent/component/categorize.py b/agent/component/categorize.py
index 94f10c799163747e18a80aef3849302665869574..7b264f131fff9e25950a05b1f9b8b391cf67d06a 100644
--- a/agent/component/categorize.py
+++ b/agent/component/categorize.py
@@ -34,15 +34,18 @@ class CategorizeParam(GenerateParam):
super().check()
self.check_empty(self.category_description, "[Categorize] Category examples")
for k, v in self.category_description.items():
- if not k: raise ValueError("[Categorize] Category name can not be empty!")
- if not v.get("to"): raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
+ if not k:
+ raise ValueError("[Categorize] Category name can not be empty!")
+ if not v.get("to"):
+ raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
def get_prompt(self):
cate_lines = []
for c, desc in self.category_description.items():
- for l in desc.get("examples", "").split("\n"):
- if not l: continue
- cate_lines.append("Question: {}\tCategory: {}".format(l, c))
+ for line in desc.get("examples", "").split("\n"):
+ if not line:
+ continue
+ cate_lines.append("Question: {}\tCategory: {}".format(line, c))
descriptions = []
for c, desc in self.category_description.items():
if desc.get("description"):
diff --git a/agent/component/deepl.py b/agent/component/deepl.py
index d5247735a81e2a8aee7adbad2d36e887fab76768..31e92729c37481ec841fc482cd488450b85c373a 100644
--- a/agent/component/deepl.py
+++ b/agent/component/deepl.py
@@ -14,7 +14,6 @@
# limitations under the License.
#
from abc import ABC
-import re
from agent.component.base import ComponentBase, ComponentParamBase
import deepl
diff --git a/agent/component/exesql.py b/agent/component/exesql.py
index eac305e8b1b10cf6d82b08ec206fb597c0051a33..e73a393ed0477b61426bfea6a98f0caf77221f3a 100644
--- a/agent/component/exesql.py
+++ b/agent/component/exesql.py
@@ -46,8 +46,10 @@ class ExeSQLParam(ComponentParamBase):
self.check_empty(self.password, "Database password")
self.check_positive_integer(self.top_n, "Number of records")
if self.database == "rag_flow":
- if self.host == "ragflow-mysql": raise ValueError("The host is not accessible.")
- if self.password == "infini_rag_flow": raise ValueError("The host is not accessible.")
+ if self.host == "ragflow-mysql":
+ raise ValueError("The host is not accessible.")
+ if self.password == "infini_rag_flow":
+ raise ValueError("The host is not accessible.")
class ExeSQL(ComponentBase, ABC):
diff --git a/agent/component/generate.py b/agent/component/generate.py
index 555ca6b0249709d502addd3aa7f92d944747b57d..27f1ce2fdfb0616fb7a1f88b15ce1e1fd303bb39 100644
--- a/agent/component/generate.py
+++ b/agent/component/generate.py
@@ -51,11 +51,16 @@ class GenerateParam(ComponentParamBase):
def gen_conf(self):
conf = {}
- if self.max_tokens > 0: conf["max_tokens"] = self.max_tokens
- if self.temperature > 0: conf["temperature"] = self.temperature
- if self.top_p > 0: conf["top_p"] = self.top_p
- if self.presence_penalty > 0: conf["presence_penalty"] = self.presence_penalty
- if self.frequency_penalty > 0: conf["frequency_penalty"] = self.frequency_penalty
+ if self.max_tokens > 0:
+ conf["max_tokens"] = self.max_tokens
+ if self.temperature > 0:
+ conf["temperature"] = self.temperature
+ if self.top_p > 0:
+ conf["top_p"] = self.top_p
+ if self.presence_penalty > 0:
+ conf["presence_penalty"] = self.presence_penalty
+ if self.frequency_penalty > 0:
+ conf["frequency_penalty"] = self.frequency_penalty
return conf
@@ -83,7 +88,8 @@ class Generate(ComponentBase):
recall_docs = []
for i in idx:
did = retrieval_res.loc[int(i), "doc_id"]
- if did in doc_ids: continue
+ if did in doc_ids:
+ continue
doc_ids.add(did)
recall_docs.append({"doc_id": did, "doc_name": retrieval_res.loc[int(i), "docnm_kwd"]})
@@ -108,7 +114,8 @@ class Generate(ComponentBase):
retrieval_res = []
self._param.inputs = []
for para in self._param.parameters:
- if not para.get("component_id"): continue
+ if not para.get("component_id"):
+ continue
component_id = para["component_id"].split("@")[0]
if para["component_id"].lower().find("@") >= 0:
cpn_id, key = para["component_id"].split("@")
@@ -142,7 +149,8 @@ class Generate(ComponentBase):
if retrieval_res:
retrieval_res = pd.concat(retrieval_res, ignore_index=True)
- else: retrieval_res = pd.DataFrame([])
+ else:
+ retrieval_res = pd.DataFrame([])
for n, v in kwargs.items():
prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
@@ -164,9 +172,11 @@ class Generate(ComponentBase):
return pd.DataFrame([res])
msg = self._canvas.get_history(self._param.message_history_window_size)
- if len(msg) < 1: msg.append({"role": "user", "content": ""})
+ if len(msg) < 1:
+ msg.append({"role": "user", "content": ""})
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
- if len(msg) < 2: msg.append({"role": "user", "content": ""})
+ if len(msg) < 2:
+ msg.append({"role": "user", "content": ""})
ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf())
if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
@@ -185,9 +195,11 @@ class Generate(ComponentBase):
return
msg = self._canvas.get_history(self._param.message_history_window_size)
- if len(msg) < 1: msg.append({"role": "user", "content": ""})
+ if len(msg) < 1:
+ msg.append({"role": "user", "content": ""})
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
- if len(msg) < 2: msg.append({"role": "user", "content": ""})
+ if len(msg) < 2:
+ msg.append({"role": "user", "content": ""})
answer = ""
for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf()):
res = {"content": ans, "reference": []}
diff --git a/agent/component/rewrite.py b/agent/component/rewrite.py
index 4257a81fda0188cd023979e879af65edc4fac26b..31390c6f6748fe2cb98272f1c368793b127c7747 100644
--- a/agent/component/rewrite.py
+++ b/agent/component/rewrite.py
@@ -95,7 +95,8 @@ class RewriteQuestion(Generate, ABC):
hist = self._canvas.get_history(4)
conv = []
for m in hist:
- if m["role"] not in ["user", "assistant"]: continue
+ if m["role"] not in ["user", "assistant"]:
+ continue
conv.append("{}: {}".format(m["role"].upper(), m["content"]))
conv = "\n".join(conv)
diff --git a/agent/component/switch.py b/agent/component/switch.py
index 05daaf96fa27adc190c9e24f00f91b0c467e832b..01affe0c6c8d6ab89a3d23361638a1ec9e1c54da 100644
--- a/agent/component/switch.py
+++ b/agent/component/switch.py
@@ -41,7 +41,8 @@ class SwitchParam(ComponentParamBase):
def check(self):
self.check_empty(self.conditions, "[Switch] conditions")
for cond in self.conditions:
- if not cond["to"]: raise ValueError(f"[Switch] 'To' can not be empty!")
+ if not cond["to"]:
+ raise ValueError("[Switch] 'To' can not be empty!")
class Switch(ComponentBase, ABC):
@@ -51,7 +52,8 @@ class Switch(ComponentBase, ABC):
res = []
for cond in self._param.conditions:
for item in cond["items"]:
- if not item["cpn_id"]: continue
+ if not item["cpn_id"]:
+ continue
if item["cpn_id"].find("begin") >= 0:
continue
cid = item["cpn_id"].split("@")[0]
@@ -63,7 +65,8 @@ class Switch(ComponentBase, ABC):
for cond in self._param.conditions:
res = []
for item in cond["items"]:
- if not item["cpn_id"]:continue
+ if not item["cpn_id"]:
+ continue
cid = item["cpn_id"].split("@")[0]
if item["cpn_id"].find("@") > 0:
cpn_id, key = item["cpn_id"].split("@")
@@ -107,22 +110,22 @@ class Switch(ComponentBase, ABC):
elif operator == ">":
try:
return True if float(input) > float(value) else False
- except Exception as e:
+ except Exception:
return True if input > value else False
elif operator == "<":
try:
return True if float(input) < float(value) else False
- except Exception as e:
+ except Exception:
return True if input < value else False
elif operator == "≥":
try:
return True if float(input) >= float(value) else False
- except Exception as e:
+ except Exception:
return True if input >= value else False
elif operator == "≤":
try:
return True if float(input) <= float(value) else False
- except Exception as e:
+ except Exception:
return True if input <= value else False
raise ValueError('Not supported operator' + operator)
\ No newline at end of file
diff --git a/agent/component/template.py b/agent/component/template.py
index 8964752941c366b34fcae68102bc567f15351e32..140688048b74b5ad150bc5033c3bf1fc490aea31 100644
--- a/agent/component/template.py
+++ b/agent/component/template.py
@@ -47,7 +47,8 @@ class Template(ComponentBase):
self._param.inputs = []
for para in self._param.parameters:
- if not para.get("component_id"): continue
+ if not para.get("component_id"):
+ continue
component_id = para["component_id"].split("@")[0]
if para["component_id"].lower().find("@") >= 0:
cpn_id, key = para["component_id"].split("@")
diff --git a/agent/test/client.py b/agent/test/client.py
index be9115290cf45c8d2d73152b745fb978c92793d3..1ab4db386ee0279f6f31a2cf612c16f9ecf59636 100644
--- a/agent/test/client.py
+++ b/agent/test/client.py
@@ -43,6 +43,7 @@ if __name__ == '__main__':
else:
print(ans["content"])
- if DEBUG: print(canvas.path)
+ if DEBUG:
+ print(canvas.path)
question = input("\n==================== User =====================\n> ")
canvas.add_user_input(question)
diff --git a/api/apps/api_app.py b/api/apps/api_app.py
index 3e8520a682c8f1b7de3a1b1553fceeafc7aa7065..3f9793b1028bc71c2c9dc236667f1856d291ed68 100644
--- a/api/apps/api_app.py
+++ b/api/apps/api_app.py
@@ -142,7 +142,6 @@ def set_conversation():
if not objs:
return get_json_result(
data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
- req = request.json
try:
if objs[0].source == "agent":
e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
@@ -188,7 +187,8 @@ def completion():
e, conv = API4ConversationService.get_by_id(req["conversation_id"])
if not e:
return get_data_error_result(message="Conversation not found!")
- if "quote" not in req: req["quote"] = False
+ if "quote" not in req:
+ req["quote"] = False
msg = []
for m in req["messages"]:
@@ -197,7 +197,8 @@ def completion():
if m["role"] == "assistant" and not msg:
continue
msg.append(m)
- if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
+ if not msg[-1].get("id"):
+ msg[-1]["id"] = get_uuid()
message_id = msg[-1]["id"]
def fillin_conv(ans):
@@ -674,11 +675,13 @@ def completion_faq():
e, conv = API4ConversationService.get_by_id(req["conversation_id"])
if not e:
return get_data_error_result(message="Conversation not found!")
- if "quote" not in req: req["quote"] = True
+ if "quote" not in req:
+ req["quote"] = True
msg = []
msg.append({"role": "user", "content": req["word"]})
- if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
+ if not msg[-1].get("id"):
+ msg[-1]["id"] = get_uuid()
message_id = msg[-1]["id"]
def fillin_conv(ans):
diff --git a/api/apps/canvas_app.py b/api/apps/canvas_app.py
index b21ca87dc2afdb2235e0b09030d145a60fe6ff47..0d307ed491d9a48815215b479a644b73047137e8 100644
--- a/api/apps/canvas_app.py
+++ b/api/apps/canvas_app.py
@@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-import logging
import json
import traceback
-from functools import partial
from flask import request, Response
from flask_login import login_required, current_user
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
@@ -60,7 +58,8 @@ def rm():
def save():
req = request.json
req["user_id"] = current_user.id
- if not isinstance(req["dsl"], str): req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
+ if not isinstance(req["dsl"], str):
+ req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
req["dsl"] = json.loads(req["dsl"])
if "id" not in req:
@@ -153,7 +152,8 @@ def run():
return resp
for answer in canvas.run(stream=False):
- if answer.get("running_status"): continue
+ if answer.get("running_status"):
+ continue
final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
if final_ans.get("reference"):
diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py
index 1b812fd809a9d2489b72d154d7d08e16a585b0d9..7786684dbfd8e687d6eb9acd362bf0bcfa3f7332 100644
--- a/api/apps/chunk_app.py
+++ b/api/apps/chunk_app.py
@@ -237,7 +237,8 @@ def create():
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
if not e:
return get_data_error_result(message="Knowledgebase not found!")
- if kb.pagerank: d["pagerank_fea"] = kb.pagerank
+ if kb.pagerank:
+ d["pagerank_fea"] = kb.pagerank
embd_id = DocumentService.get_embd_id(req["doc_id"])
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py
index 5da16dc0374eaad88c83f54806c690c7ab5d4c14..91f4bc8525e5cf103de6b267301a99539977d1a9 100644
--- a/api/apps/conversation_app.py
+++ b/api/apps/conversation_app.py
@@ -281,10 +281,12 @@ def thumbup():
if req["message_id"] == msg.get("id", "") and msg.get("role", "") == "assistant":
if up_down:
msg["thumbup"] = True
- if "feedback" in msg: del msg["feedback"]
+ if "feedback" in msg:
+ del msg["feedback"]
else:
msg["thumbup"] = False
- if feedback: msg["feedback"] = feedback
+ if feedback:
+ msg["feedback"] = feedback
break
ConversationService.update_by_id(conv["id"], conv)
diff --git a/api/apps/dialog_app.py b/api/apps/dialog_app.py
index 32bd62a0cba3b4558bbd8c789c654663cb398bff..0accb04a6553dd94c75fdf95d0396aa6a67e01ed 100644
--- a/api/apps/dialog_app.py
+++ b/api/apps/dialog_app.py
@@ -37,10 +37,12 @@ def set_dialog():
top_n = req.get("top_n", 6)
top_k = req.get("top_k", 1024)
rerank_id = req.get("rerank_id", "")
- if not rerank_id: req["rerank_id"] = ""
+ if not rerank_id:
+ req["rerank_id"] = ""
similarity_threshold = req.get("similarity_threshold", 0.1)
vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
- if vector_similarity_weight is None: vector_similarity_weight = 0.3
+ if vector_similarity_weight is None:
+ vector_similarity_weight = 0.3
llm_setting = req.get("llm_setting", {})
default_prompt = {
"system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
diff --git a/api/apps/document_app.py b/api/apps/document_app.py
index 35dd84ebdaa9a9f03e63355f437ded3f0facd476..deb2fbeadaa663b0ac4da31a87dd4612e7c5b8ec 100644
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License
#
-import json
import os.path
import pathlib
import re
@@ -90,7 +89,8 @@ def web_crawl():
raise LookupError("Can't find this knowledgebase!")
blob = html2pdf(url)
- if not blob: return server_error_response(ValueError("Download failure."))
+ if not blob:
+ return server_error_response(ValueError("Download failure."))
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
@@ -290,7 +290,8 @@ def change_status():
def rm():
req = request.json
doc_ids = req["doc_id"]
- if isinstance(doc_ids, str): doc_ids = [doc_ids]
+ if isinstance(doc_ids, str):
+ doc_ids = [doc_ids]
for doc_id in doc_ids:
if not DocumentService.accessible4deletion(doc_id, current_user.id):
diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py
index 9c7c8dcc4f20f8c5e1ac1f0b3749b8ecd623bcc2..5b8ad87d5ae81012e1340429e7939dd95599c00d 100644
--- a/api/apps/llm_app.py
+++ b/api/apps/llm_app.py
@@ -351,8 +351,10 @@ def list_app():
llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
for o in objs:
- if not o.api_key: continue
- if o.llm_name + "@" + o.llm_factory in llm_set: continue
+ if not o.api_key:
+ continue
+ if o.llm_name + "@" + o.llm_factory in llm_set:
+ continue
llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
res = {}
diff --git a/api/apps/sdk/agent.py b/api/apps/sdk/agent.py
index 0ea4bfa00065c6cb0592005f1f3d86c8627a01a0..79cb5954cf14e9b9819eaad29f943b58dd0c1c23 100644
--- a/api/apps/sdk/agent.py
+++ b/api/apps/sdk/agent.py
@@ -14,7 +14,7 @@
# limitations under the License.
#
-from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
+from api.db.services.canvas_service import UserCanvasService
from api.utils.api_utils import get_error_data_result, token_required
from api.utils.api_utils import get_result
from flask import request
diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py
index 0132cd9944237535abdccdbfb05def965bea903b..2a2bacbe147133ee719c46d7944a98e7ddc9994c 100644
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@@ -41,7 +41,6 @@ from api.utils.api_utils import construct_json_result, get_parser_config
from rag.nlp import search
from rag.utils import rmSpace
from rag.utils.storage_factory import STORAGE_IMPL
-import os
MAXIMUM_OF_UPLOADING_FILES = 256
@@ -976,12 +975,12 @@ def add_chunk(tenant_id, dataset_id, document_id):
if not req.get("content"):
return get_error_data_result(message="`content` is required")
if "important_keywords" in req:
- if type(req["important_keywords"]) != list:
+ if not isinstance(req["important_keywords"], list):
return get_error_data_result(
"`important_keywords` is required to be a list"
)
if "questions" in req:
- if type(req["questions"]) != list:
+ if not isinstance(req["questions"], list):
return get_error_data_result(
"`questions` is required to be a list"
)
diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
index 3d9ea9d51e2d4a563356dc92b4add9303c7827ad..6fc031243266d21c6de38d528f6caf897c83cdf6 100644
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@@ -143,8 +143,10 @@ def completion(tenant_id, chat_id):
}
conv.message.append(question)
for m in conv.message:
- if m["role"] == "system": continue
- if m["role"] == "assistant" and not msg: continue
+ if m["role"] == "system":
+ continue
+ if m["role"] == "assistant" and not msg:
+ continue
msg.append(m)
message_id = msg[-1].get("id")
e, dia = DialogService.get_by_id(conv.dialog_id)
@@ -267,7 +269,8 @@ def agent_completion(tenant_id, agent_id):
if m["role"] == "assistant" and not msg:
continue
msg.append(m)
- if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
+ if not msg[-1].get("id"):
+ msg[-1]["id"] = get_uuid()
message_id = msg[-1]["id"]
stream = req.get("stream", True)
@@ -361,7 +364,8 @@ def agent_completion(tenant_id, agent_id):
return resp
for answer in canvas.run(stream=False):
- if answer.get("running_status"): continue
+ if answer.get("running_status"):
+ continue
final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
if final_ans.get("reference"):
diff --git a/api/apps/user_app.py b/api/apps/user_app.py
index cc0506316fb68bd0b5ca77e5d2a5cbc2da0c6540..a9ec7c1209d3234b870e7a8991844dd9cb06e2ae 100644
--- a/api/apps/user_app.py
+++ b/api/apps/user_app.py
@@ -330,7 +330,7 @@ def user_info_from_github(access_token):
headers=headers,
).json()
user_info["email"] = next(
- (email for email in email_info if email["primary"] == True), None
+ (email for email in email_info if email["primary"]), None
)["email"]
return user_info
diff --git a/api/db/db_models.py b/api/db/db_models.py
index bb3c97851eaca376d9e15aaf92bee761eb732df9..0c052ca18a4e87d704bc3053908585135b8f5097 100644
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@@ -130,7 +130,7 @@ def is_continuous_field(cls: typing.Type) -> bool:
for p in cls.__bases__:
if p in CONTINUOUS_FIELD_TYPE:
return True
- elif p != Field and p != object:
+ elif p is not Field and p is not object:
if is_continuous_field(p):
return True
else:
diff --git a/api/db/init_data.py b/api/db/init_data.py
index f1d468a2ad7e693ad1e741b215365fbd58161ea7..4817b05fdd4d5526399c05438fb2d2ceb8f64229 100644
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@@ -170,7 +170,7 @@ def add_graph_templates():
cnvs = json.load(open(os.path.join(dir, fnm), "r"))
try:
CanvasTemplateService.save(**cnvs)
- except:
+ except Exception:
CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
except Exception:
logging.exception("Add graph templates error: ")
diff --git a/api/db/services/__init__.py b/api/db/services/__init__.py
index 2363e65df817919802e0c7595f319203eaae0fb3..964a7a17b28cc2a737f097562e6f071c072c1dd6 100644
--- a/api/db/services/__init__.py
+++ b/api/db/services/__init__.py
@@ -15,13 +15,14 @@
#
import pathlib
import re
-from .user_service import UserService
+from .user_service import UserService as UserService
def duplicate_name(query_func, **kwargs):
fnm = kwargs["name"]
objs = query_func(**kwargs)
- if not objs: return fnm
+ if not objs:
+ return fnm
ext = pathlib.Path(fnm).suffix #.jpg
nm = re.sub(r"%s$"%ext, "", fnm)
r = re.search(r"\(([0-9]+)\)$", nm)
@@ -31,8 +32,8 @@ def duplicate_name(query_func, **kwargs):
nm = re.sub(r"\([0-9]+\)$", "", nm)
c += 1
nm = f"{nm}({c})"
- if ext: nm += f"{ext}"
+ if ext:
+ nm += f"{ext}"
kwargs["name"] = nm
return duplicate_name(query_func, **kwargs)
-
diff --git a/api/db/services/api_service.py b/api/db/services/api_service.py
index 864e664329c1aac86998671e7c235c0600a15af0..640a22369355013e5158072bfd252478f6348b58 100644
--- a/api/db/services/api_service.py
+++ b/api/db/services/api_service.py
@@ -64,7 +64,8 @@ class API4ConversationService(CommonService):
@classmethod
@DB.connection_context()
def stats(cls, tenant_id, from_date, to_date, source=None):
- if len(to_date) == 10: to_date += " 23:59:59"
+ if len(to_date) == 10:
+ to_date += " 23:59:59"
return cls.model.select(
cls.model.create_date.truncate("day").alias("dt"),
peewee.fn.COUNT(
diff --git a/api/db/services/canvas_service.py b/api/db/services/canvas_service.py
index 23eb186934f880f7ff2961aad42a49a29107227d..0fac2f24850a39b506f30a3f585737bf43a680c1 100644
--- a/api/db/services/canvas_service.py
+++ b/api/db/services/canvas_service.py
@@ -13,9 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from datetime import datetime
-import peewee
-from api.db.db_models import DB, API4Conversation, APIToken, Dialog, CanvasTemplate, UserCanvas
+from api.db.db_models import DB, CanvasTemplate, UserCanvas
from api.db.services.common_service import CommonService
diff --git a/api/db/services/common_service.py b/api/db/services/common_service.py
index f0f52930c7fd848a7e8e052c7c2338dad62014aa..dcbe28cda637bb45194f63ab64d7695b53ea4583 100644
--- a/api/db/services/common_service.py
+++ b/api/db/services/common_service.py
@@ -115,7 +115,7 @@ class CommonService:
try:
obj = cls.model.query(id=pid)[0]
return True, obj
- except Exception as e:
+ except Exception:
return False, None
@classmethod
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index 9e9aa507186e07e6b2c68e8634c7b980edf148b1..1a63b7962235d38621b391b7833fe995f24a8373 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -106,15 +106,15 @@ def message_fit_in(msg, max_length=4000):
return c, msg
ll = num_tokens_from_string(msg_[0]["content"])
- l = num_tokens_from_string(msg_[-1]["content"])
- if ll / (ll + l) > 0.8:
+ ll2 = num_tokens_from_string(msg_[-1]["content"])
+ if ll / (ll + ll2) > 0.8:
m = msg_[0]["content"]
- m = encoder.decode(encoder.encode(m)[:max_length - l])
+ m = encoder.decode(encoder.encode(m)[:max_length - ll2])
msg[0]["content"] = m
return max_length, msg
m = msg_[1]["content"]
- m = encoder.decode(encoder.encode(m)[:max_length - l])
+ m = encoder.decode(encoder.encode(m)[:max_length - ll2])
msg[1]["content"] = m
return max_length, msg
@@ -257,7 +257,8 @@ def chat(dialog, messages, stream=True, **kwargs):
idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
recall_docs = [
d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
- if not recall_docs: recall_docs = kbinfos["doc_aggs"]
+ if not recall_docs:
+ recall_docs = kbinfos["doc_aggs"]
kbinfos["doc_aggs"] = recall_docs
refs = deepcopy(kbinfos)
@@ -433,13 +434,15 @@ def relevant(tenant_id, llm_id, question, contents: list):
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
No other words needed except 'yes' or 'no'.
"""
- if not contents:return False
+ if not contents:
+ return False
contents = "Documents: \n" + " - ".join(contents)
contents = f"Question: {question}\n" + contents
if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
- if ans.lower().find("yes") >= 0: return True
+ if ans.lower().find("yes") >= 0:
+ return True
return False
@@ -481,8 +484,10 @@ Requirements:
]
_, msg = message_fit_in(msg, chat_mdl.max_length)
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
- if isinstance(kwd, tuple): kwd = kwd[0]
- if kwd.find("**ERROR**") >=0: return ""
+ if isinstance(kwd, tuple):
+ kwd = kwd[0]
+ if kwd.find("**ERROR**") >=0:
+ return ""
return kwd
@@ -508,8 +513,10 @@ Requirements:
]
_, msg = message_fit_in(msg, chat_mdl.max_length)
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
- if isinstance(kwd, tuple): kwd = kwd[0]
- if kwd.find("**ERROR**") >= 0: return ""
+ if isinstance(kwd, tuple):
+ kwd = kwd[0]
+ if kwd.find("**ERROR**") >= 0:
+ return ""
return kwd
@@ -520,7 +527,8 @@ def full_question(tenant_id, llm_id, messages):
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
conv = []
for m in messages:
- if m["role"] not in ["user", "assistant"]: continue
+ if m["role"] not in ["user", "assistant"]:
+ continue
conv.append("{}: {}".format(m["role"].upper(), m["content"]))
conv = "\n".join(conv)
today = datetime.date.today().isoformat()
@@ -581,7 +589,8 @@ Output: What's the weather in Rochester on {tomorrow}?
def tts(tts_mdl, text):
- if not tts_mdl or not text: return
+ if not tts_mdl or not text:
+ return
bin = b""
for chunk in tts_mdl.tts(text):
bin += chunk
@@ -641,7 +650,8 @@ def ask(question, kb_ids, tenant_id):
idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
recall_docs = [
d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
- if not recall_docs: recall_docs = kbinfos["doc_aggs"]
+ if not recall_docs:
+ recall_docs = kbinfos["doc_aggs"]
kbinfos["doc_aggs"] = recall_docs
refs = deepcopy(kbinfos)
for c in refs["chunks"]:
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index 9feb69da01b8c1395ab7dcda792e8c732fe7c8d1..aea4931ebf300c2ea61d36c9d27e7957a197c83a 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -532,7 +532,8 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
try:
mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output,
ensure_ascii=False, indent=2)
- if len(mind_map) < 32: raise Exception("Few content: " + mind_map)
+ if len(mind_map) < 32:
+ raise Exception("Few content: " + mind_map)
cks.append({
"id": get_uuid(),
"doc_id": doc_id,
diff --git a/api/db/services/file2document_service.py b/api/db/services/file2document_service.py
index e04ed190cb73e255456dfb3f8d254e760178140e..f3f587e465440778bafb5ecfec984fb87cfc6c48 100644
--- a/api/db/services/file2document_service.py
+++ b/api/db/services/file2document_service.py
@@ -20,7 +20,7 @@ from api.db.db_models import DB
from api.db.db_models import File, File2Document
from api.db.services.common_service import CommonService
from api.db.services.document_service import DocumentService
-from api.utils import current_timestamp, datetime_format, get_uuid
+from api.utils import current_timestamp, datetime_format
class File2DocumentService(CommonService):
@@ -63,7 +63,7 @@ class File2DocumentService(CommonService):
def update_by_file_id(cls, file_id, obj):
obj["update_time"] = current_timestamp()
obj["update_date"] = datetime_format(datetime.now())
- num = cls.model.update(obj).where(cls.model.id == file_id).execute()
+ # num = cls.model.update(obj).where(cls.model.id == file_id).execute()
e, obj = cls.get_by_id(cls.model.id)
return obj
diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py
index 2602bf3764b4ec36376b71fd322a21270108b756..be225aeeb907a8ae37958b3b6b3bbd9adecb8ca0 100644
--- a/api/db/services/file_service.py
+++ b/api/db/services/file_service.py
@@ -85,7 +85,8 @@ class FileService(CommonService):
.join(Document, on=(File2Document.document_id == Document.id))
.join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id))
.where(cls.model.id == file_id))
- if not kbs: return []
+ if not kbs:
+ return []
kbs_info_list = []
for kb in list(kbs.dicts()):
kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']})
@@ -304,7 +305,8 @@ class FileService(CommonService):
@classmethod
@DB.connection_context()
def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
- for _ in File2DocumentService.get_by_document_id(doc["id"]): return
+ for _ in File2DocumentService.get_by_document_id(doc["id"]):
+ return
file = {
"id": get_uuid(),
"parent_id": kb_folder_id,
diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py
index 16fd42ca161869843acaee9ad42c9453306f2b74..2d47a93effd9411237bbc1fe872130889653ad41 100644
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@@ -107,7 +107,8 @@ class TenantLLMService(CommonService):
model_config = cls.get_api_key(tenant_id, mdlnm)
mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
- if model_config: model_config = model_config.to_dict()
+ if model_config:
+ model_config = model_config.to_dict()
if not model_config:
if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py
index b55621fe8815b6e5d2d8a9d00086283bb71e6395..424a571ee5731f565df3eeb9c70c39767f13c55a 100644
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@@ -57,28 +57,33 @@ class TaskService(CommonService):
Tenant.img2txt_id,
Tenant.asr_id,
Tenant.llm_id,
- cls.model.update_time]
- docs = cls.model.select(*fields) \
- .join(Document, on=(cls.model.doc_id == Document.id)) \
- .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
- .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \
+ cls.model.update_time,
+ ]
+ docs = (
+ cls.model.select(*fields)
+ .join(Document, on=(cls.model.doc_id == Document.id))
+ .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
+ .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
.where(cls.model.id == task_id)
+ )
docs = list(docs.dicts())
- if not docs: return None
+ if not docs:
+ return None
msg = "\nTask has been received."
- prog = random.random() / 10.
+ prog = random.random() / 10.0
if docs[0]["retry_count"] >= 3:
msg = "\nERROR: Task is abandoned after 3 times attempts."
prog = -1
- cls.model.update(progress_msg=cls.model.progress_msg + msg,
- progress=prog,
- retry_count=docs[0]["retry_count"]+1
- ).where(
- cls.model.id == docs[0]["id"]).execute()
+ cls.model.update(
+ progress_msg=cls.model.progress_msg + msg,
+ progress=prog,
+ retry_count=docs[0]["retry_count"] + 1,
+ ).where(cls.model.id == docs[0]["id"]).execute()
- if docs[0]["retry_count"] >= 3: return None
+ if docs[0]["retry_count"] >= 3:
+ return None
return docs[0]
@@ -86,21 +91,44 @@ class TaskService(CommonService):
@DB.connection_context()
def get_ongoing_doc_name(cls):
with DB.lock("get_task", -1):
- docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \
- .join(Document, on=(cls.model.doc_id == Document.id)) \
- .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \
- .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \
+ docs = (
+ cls.model.select(
+ *[Document.id, Document.kb_id, Document.location, File.parent_id]
+ )
+ .join(Document, on=(cls.model.doc_id == Document.id))
+ .join(
+ File2Document,
+ on=(File2Document.document_id == Document.id),
+ join_type=JOIN.LEFT_OUTER,
+ )
+ .join(
+ File,
+ on=(File2Document.file_id == File.id),
+ join_type=JOIN.LEFT_OUTER,
+ )
.where(
Document.status == StatusEnum.VALID.value,
Document.run == TaskStatus.RUNNING.value,
~(Document.type == FileType.VIRTUAL.value),
cls.model.progress < 1,
- cls.model.create_time >= current_timestamp() - 1000 * 600
+ cls.model.create_time >= current_timestamp() - 1000 * 600,
)
+ )
docs = list(docs.dicts())
- if not docs: return []
-
- return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs]))
+ if not docs:
+ return []
+
+ return list(
+ set(
+ [
+ (
+ d["parent_id"] if d["parent_id"] else d["kb_id"],
+ d["location"],
+ )
+ for d in docs
+ ]
+ )
+ )
@classmethod
@DB.connection_context()
@@ -118,28 +146,30 @@ class TaskService(CommonService):
def update_progress(cls, id, info):
if os.environ.get("MACOS"):
if info["progress_msg"]:
- cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
- cls.model.id == id).execute()
+ cls.model.update(
+ progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
+ ).where(cls.model.id == id).execute()
if "progress" in info:
cls.model.update(progress=info["progress"]).where(
- cls.model.id == id).execute()
+ cls.model.id == id
+ ).execute()
return
with DB.lock("update_progress", -1):
if info["progress_msg"]:
- cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
- cls.model.id == id).execute()
+ cls.model.update(
+ progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
+ ).where(cls.model.id == id).execute()
if "progress" in info:
cls.model.update(progress=info["progress"]).where(
- cls.model.id == id).execute()
+ cls.model.id == id
+ ).execute()
def queue_tasks(doc: dict, bucket: str, name: str):
def new_task():
- return {
- "id": get_uuid(),
- "doc_id": doc["id"]
- }
+ return {"id": get_uuid(), "doc_id": doc["id"]}
+
tsks = []
if doc["type"] == FileType.PDF.value:
@@ -150,8 +180,8 @@ def queue_tasks(doc: dict, bucket: str, name: str):
if doc["parser_id"] == "paper":
page_size = doc["parser_config"].get("task_page_size", 22)
if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
- page_size = 10 ** 9
- page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
+ page_size = 10**9
+ page_ranges = doc["parser_config"].get("pages") or [(1, 10**5)]
for s, e in page_ranges:
s -= 1
s = max(0, s)
@@ -177,4 +207,6 @@ def queue_tasks(doc: dict, bucket: str, name: str):
DocumentService.begin2parse(doc["id"])
for t in tsks:
- assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status."
+ assert REDIS_CONN.queue_product(
+ SVR_QUEUE_NAME, message=t
+ ), "Can't access Redis. Please check the Redis' status."
diff --git a/api/db/services/user_service.py b/api/db/services/user_service.py
index 49a1d7f9bad9e7382d077756e6ca963acf4cb195..44cafb87ace9c7e0eca693dc49c125dddf926f21 100644
--- a/api/db/services/user_service.py
+++ b/api/db/services/user_service.py
@@ -22,7 +22,7 @@ from api.db import UserTenantRole
from api.db.db_models import DB, UserTenant
from api.db.db_models import User, Tenant
from api.db.services.common_service import CommonService
-from api.utils import get_uuid, get_format_time, current_timestamp, datetime_format
+from api.utils import get_uuid, current_timestamp, datetime_format
from api.db import StatusEnum
diff --git a/api/ragflow_server.py b/api/ragflow_server.py
index 0d7cd6a366723a3ec0e0ea08a36268b213a1289a..713ba1881e68f2347d66d5bb8854fa33644e9ea2 100644
--- a/api/ragflow_server.py
+++ b/api/ragflow_server.py
@@ -21,10 +21,7 @@
import logging
import os
from api.utils.log_utils import initRootLogger
-LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
-initRootLogger("ragflow_server", LOG_LEVELS)
-import os
import signal
import sys
import time
@@ -44,6 +41,9 @@ from api.versions import get_ragflow_version
from api.utils import show_configs
from rag.settings import print_rag_settings
+LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
+initRootLogger("ragflow_server", LOG_LEVELS)
+
def update_progress():
while True:
diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py
index e20b85d29f859a87adc0af1d03cd752916a9b00f..635497d07e70c71fa1786789a92f17344ddf58c7 100644
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@@ -36,7 +36,6 @@ from werkzeug.http import HTTP_STATUS_CODES
from api.db.db_models import APIToken
from api import settings
-from api import settings
from api.utils import CustomJSONEncoder, get_uuid
from api.utils import json_dumps
from api.constants import REQUEST_WAIT_SEC, REQUEST_MAX_WAIT_SEC
diff --git a/api/validation.py b/api/validation.py
index 39d506a8f2413cd4f342b9fef12198d74b46790f..b552b3375a4a86365844d13e97b1210e92deb9e4 100644
--- a/api/validation.py
+++ b/api/validation.py
@@ -45,5 +45,5 @@ try:
pool = Pool(processes=1)
thread = pool.apply_async(download_nltk_data)
binary = thread.get(timeout=60)
-except Exception as e:
+except Exception:
print('\x1b[6;37;41m WARNING \x1b[0m' + "Downloading NLTK data failure.", flush=True)
diff --git a/deepdoc/parser/__init__.py b/deepdoc/parser/__init__.py
index 67e5b5a8dcb76d62e2e22494964be805211ff557..2a62a989468b987a227ead60383964af3bb24d7c 100644
--- a/deepdoc/parser/__init__.py
+++ b/deepdoc/parser/__init__.py
@@ -18,4 +18,16 @@ from .ppt_parser import RAGFlowPptParser as PptParser
from .html_parser import RAGFlowHtmlParser as HtmlParser
from .json_parser import RAGFlowJsonParser as JsonParser
from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser
-from .txt_parser import RAGFlowTxtParser as TxtParser
\ No newline at end of file
+from .txt_parser import RAGFlowTxtParser as TxtParser
+
+__all__ = [
+ "PdfParser",
+ "PlainParser",
+ "DocxParser",
+ "ExcelParser",
+ "PptParser",
+ "HtmlParser",
+ "JsonParser",
+ "MarkdownParser",
+ "TxtParser",
+]
\ No newline at end of file
diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py
index 4bb509061245b78d9ba68cadccc2f6dcade02f51..1d23978b70714808f25d93cc8e1d811abc922fae 100644
--- a/deepdoc/parser/excel_parser.py
+++ b/deepdoc/parser/excel_parser.py
@@ -29,7 +29,8 @@ class RAGFlowExcelParser:
for sheetname in wb.sheetnames:
ws = wb[sheetname]
rows = list(ws.rows)
- if not rows: continue
+ if not rows:
+ continue
tb_rows_0 = "
"
for t in list(rows[0]):
@@ -40,7 +41,9 @@ class RAGFlowExcelParser:
tb = ""
tb += f"{sheetname}"
tb += tb_rows_0
- for r in list(rows[1 + chunk_i * chunk_rows:1 + (chunk_i + 1) * chunk_rows]):
+ for r in list(
+ rows[1 + chunk_i * chunk_rows : 1 + (chunk_i + 1) * chunk_rows]
+ ):
tb += ""
for i, c in enumerate(r):
if c.value is None:
@@ -62,20 +65,21 @@ class RAGFlowExcelParser:
for sheetname in wb.sheetnames:
ws = wb[sheetname]
rows = list(ws.rows)
- if not rows:continue
+ if not rows:
+ continue
ti = list(rows[0])
for r in list(rows[1:]):
- l = []
+ fields = []
for i, c in enumerate(r):
if not c.value:
continue
t = str(ti[i].value) if i < len(ti) else ""
t += (":" if t else "") + str(c.value)
- l.append(t)
- l = "; ".join(l)
+ fields.append(t)
+ line = "; ".join(fields)
if sheetname.lower().find("sheet") < 0:
- l += " ——" + sheetname
- res.append(l)
+ line += " ——" + sheetname
+ res.append(line)
return res
@staticmethod
diff --git a/deepdoc/parser/html_parser.py b/deepdoc/parser/html_parser.py
index e02aaa1f3e51210e733ec57b8d0d433366c0345b..973dbbd4644ddb22c873ebd8405da4c9b7eb2637 100644
--- a/deepdoc/parser/html_parser.py
+++ b/deepdoc/parser/html_parser.py
@@ -36,7 +36,7 @@ class RAGFlowHtmlParser:
@classmethod
def parser_txt(cls, txt):
- if type(txt) != str:
+ if not isinstance(txt, str):
raise TypeError("txt type should be str!")
html_doc = readability.Document(txt)
title = html_doc.title()
diff --git a/deepdoc/parser/json_parser.py b/deepdoc/parser/json_parser.py
index 1dd620d44d38822a95a3068199933803cb7dd6d4..08ddc89acf07a094b3d08cd2d6d93f74079a02cb 100644
--- a/deepdoc/parser/json_parser.py
+++ b/deepdoc/parser/json_parser.py
@@ -22,7 +22,7 @@ class RAGFlowJsonParser:
txt = binary.decode(encoding, errors="ignore")
json_data = json.loads(txt)
chunks = self.split_json(json_data, True)
- sections = [json.dumps(l, ensure_ascii=False) for l in chunks if l]
+ sections = [json.dumps(line, ensure_ascii=False) for line in chunks if line]
return sections
@staticmethod
diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py
index 5ffecde4b9b1ef58fa6ea884aebd15e73b4e3108..82fab32d24e5f30e2d84f964ed792ca843d15626 100644
--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@@ -752,7 +752,7 @@ class RAGFlowPdfParser:
"x1": np.max([b["x1"] for b in bxs]),
"bottom": np.max([b["bottom"] for b in bxs]) - ht
}
- louts = [l for l in self.page_layout[pn] if l["type"] == ltype]
+ louts = [layout for layout in self.page_layout[pn] if layout["type"] == ltype]
ii = Recognizer.find_overlapped(b, louts, naive=True)
if ii is not None:
b = louts[ii]
@@ -763,7 +763,8 @@ class RAGFlowPdfParser:
"layoutno", "")))
left, top, right, bott = b["x0"], b["top"], b["x1"], b["bottom"]
- if right < left: right = left + 1
+ if right < left:
+ right = left + 1
poss.append((pn + self.page_from, left, right, top, bott))
return self.page_images[pn] \
.crop((left * ZM, top * ZM,
@@ -845,7 +846,8 @@ class RAGFlowPdfParser:
top = bx["top"] - self.page_cum_height[pn[0] - 1]
bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
page_images_cnt = len(self.page_images)
- if pn[-1] - 1 >= page_images_cnt: return ""
+ if pn[-1] - 1 >= page_images_cnt:
+ return ""
while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
bott -= self.page_images[pn[-1] - 1].size[1] / ZM
pn.append(pn[-1] + 1)
@@ -889,7 +891,6 @@ class RAGFlowPdfParser:
nonlocal mh, pw, lines, widths
lines.append(line)
widths.append(width(line))
- width_mean = np.mean(widths)
mmj = self.proj_match(
line["text"]) or line.get(
"layout_type",
@@ -994,7 +995,7 @@ class RAGFlowPdfParser:
else:
self.is_english = False
- st = timer()
+ # st = timer()
for i, img in enumerate(self.page_images_x2):
chars = self.page_chars[i] if not self.is_english else []
self.mean_height.append(
@@ -1028,8 +1029,8 @@ class RAGFlowPdfParser:
self.page_cum_height = np.cumsum(self.page_cum_height)
assert len(self.page_cum_height) == len(self.page_images) + 1
- if len(self.boxes) == 0 and zoomin < 9: self.__images__(fnm, zoomin * 3, page_from,
- page_to, callback)
+ if len(self.boxes) == 0 and zoomin < 9:
+ self.__images__(fnm, zoomin * 3, page_from, page_to, callback)
def __call__(self, fnm, need_image=True, zoomin=3, return_html=False):
self.__images__(fnm, zoomin)
@@ -1168,7 +1169,7 @@ class PlainParser(object):
if not self.outlines:
logging.warning("Miss outlines")
- return [(l, "") for l in lines], []
+ return [(line, "") for line in lines], []
def crop(self, ck, need_position):
raise NotImplementedError
diff --git a/deepdoc/parser/resume/__init__.py b/deepdoc/parser/resume/__init__.py
index fab6f7e716eb0d5ac2aebda5690c6f704f85da74..1038bf2b7b128a89cff113f3b75fea4b35a8d89f 100644
--- a/deepdoc/parser/resume/__init__.py
+++ b/deepdoc/parser/resume/__init__.py
@@ -15,21 +15,42 @@ import datetime
def refactor(cv):
- for n in ["raw_txt", "parser_name", "inference", "ori_text", "use_time", "time_stat"]:
- if n in cv and cv[n] is not None: del cv[n]
+ for n in [
+ "raw_txt",
+ "parser_name",
+ "inference",
+ "ori_text",
+ "use_time",
+ "time_stat",
+ ]:
+ if n in cv and cv[n] is not None:
+ del cv[n]
cv["is_deleted"] = 0
- if "basic" not in cv: cv["basic"] = {}
- if cv["basic"].get("photo2"): del cv["basic"]["photo2"]
+ if "basic" not in cv:
+ cv["basic"] = {}
+ if cv["basic"].get("photo2"):
+ del cv["basic"]["photo2"]
- for n in ["education", "work", "certificate", "project", "language", "skill", "training"]:
- if n not in cv or cv[n] is None: continue
- if type(cv[n]) == type({}): cv[n] = [v for _, v in cv[n].items()]
- if type(cv[n]) != type([]):
+ for n in [
+ "education",
+ "work",
+ "certificate",
+ "project",
+ "language",
+ "skill",
+ "training",
+ ]:
+ if n not in cv or cv[n] is None:
+ continue
+ if isinstance(cv[n], dict):
+ cv[n] = [v for _, v in cv[n].items()]
+ if not isinstance(cv[n], list):
del cv[n]
continue
vv = []
for v in cv[n]:
- if "external" in v and v["external"] is not None: del v["external"]
+ if "external" in v and v["external"] is not None:
+ del v["external"]
vv.append(v)
cv[n] = {str(i): vv[i] for i in range(len(vv))}
@@ -42,24 +63,44 @@ def refactor(cv):
cv["basic"][t] = cv["basic"][n]
del cv["basic"][n]
- work = sorted([v for _, v in cv.get("work", {}).items()], key=lambda x: x.get("start_time", ""))
- edu = sorted([v for _, v in cv.get("education", {}).items()], key=lambda x: x.get("start_time", ""))
+ work = sorted(
+ [v for _, v in cv.get("work", {}).items()],
+ key=lambda x: x.get("start_time", ""),
+ )
+ edu = sorted(
+ [v for _, v in cv.get("education", {}).items()],
+ key=lambda x: x.get("start_time", ""),
+ )
if work:
cv["basic"]["work_start_time"] = work[0].get("start_time", "")
- cv["basic"]["management_experience"] = 'Y' if any(
- [w.get("management_experience", '') == 'Y' for w in work]) else 'N'
+ cv["basic"]["management_experience"] = (
+ "Y"
+ if any([w.get("management_experience", "") == "Y" for w in work])
+ else "N"
+ )
cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0")
- for n in ["annual_salary_from", "annual_salary_to", "industry_name", "position_name", "responsibilities",
- "corporation_type", "scale", "corporation_name"]:
+ for n in [
+ "annual_salary_from",
+ "annual_salary_to",
+ "industry_name",
+ "position_name",
+ "responsibilities",
+ "corporation_type",
+ "scale",
+ "corporation_name",
+ ]:
cv["basic"][n] = work[-1].get(n, "")
if edu:
for n in ["school_name", "discipline_name"]:
- if n in edu[-1]: cv["basic"][n] = edu[-1][n]
+ if n in edu[-1]:
+ cv["basic"][n] = edu[-1][n]
cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- if "contact" not in cv: cv["contact"] = {}
- if not cv["contact"].get("name"): cv["contact"]["name"] = cv["basic"].get("name", "")
- return cv
\ No newline at end of file
+ if "contact" not in cv:
+ cv["contact"] = {}
+ if not cv["contact"].get("name"):
+ cv["contact"]["name"] = cv["basic"].get("name", "")
+ return cv
diff --git a/deepdoc/parser/resume/entities/corporations.py b/deepdoc/parser/resume/entities/corporations.py
index 142b0f5e492a2d941af8b52918f661641929a90b..6d0b293de4a911b9d311ac3ab945947f8abf803c 100644
--- a/deepdoc/parser/resume/entities/corporations.py
+++ b/deepdoc/parser/resume/entities/corporations.py
@@ -21,13 +21,18 @@ from . import regions
current_file_path = os.path.dirname(os.path.abspath(__file__))
-GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
+GOODS = pd.read_csv(
+ os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0
+).fillna(0)
GOODS["cid"] = GOODS["cid"].astype(str)
GOODS = GOODS.set_index(["cid"])
-CORP_TKS = json.load(open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r"))
+CORP_TKS = json.load(
+ open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r")
+)
GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r"))
CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r"))
+
def baike(cid, default_v=0):
global GOODS
try:
@@ -39,27 +44,41 @@ def baike(cid, default_v=0):
def corpNorm(nm, add_region=True):
global CORP_TKS
- if not nm or type(nm)!=type(""):return ""
+ if not nm or isinstance(nm, str):
+ return ""
nm = rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(nm)).lower()
nm = re.sub(r"&", "&", nm)
nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm)
- nm = re.sub(r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE)
- nm = re.sub(r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", "", nm, 10000, re.IGNORECASE)
- if not nm or (len(nm)<5 and not regions.isName(nm[0:2])):return nm
+ nm = re.sub(
+ r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE
+ )
+ nm = re.sub(
+ r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$",
+ "",
+ nm,
+ 10000,
+ re.IGNORECASE,
+ )
+ if not nm or (len(nm) < 5 and not regions.isName(nm[0:2])):
+ return nm
tks = rag_tokenizer.tokenize(nm).split()
- reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
+ reg = [t for i, t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
nm = ""
for t in tks:
- if regions.isName(t) or t in CORP_TKS:continue
- if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):nm += " "
+ if regions.isName(t) or t in CORP_TKS:
+ continue
+ if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):
+ nm += " "
nm += t
r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip())
- if r:nm = r.group(1)
+ if r:
+ nm = r.group(1)
r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip())
- if r:nm = r.group(1)
- return nm.strip() + (("" if not reg else "(%s)"%reg[0]) if add_region else "")
+ if r:
+ nm = r.group(1)
+ return nm.strip() + (("" if not reg else "(%s)" % reg[0]) if add_region else "")
def rmNoise(n):
@@ -67,33 +86,40 @@ def rmNoise(n):
n = re.sub(r"[,. &()()]+", "", n)
return n
+
GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
-for c,v in CORP_TAG.items():
+for c, v in CORP_TAG.items():
cc = corpNorm(rmNoise(c), False)
if not cc:
logging.debug(c)
-CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
+CORP_TAG = {corpNorm(rmNoise(c), False): v for c, v in CORP_TAG.items()}
+
def is_good(nm):
global GOOD_CORP
- if nm.find("外派")>=0:return False
+ if nm.find("外派") >= 0:
+ return False
nm = rmNoise(nm)
nm = corpNorm(nm, False)
for n in GOOD_CORP:
if re.match(r"[0-9a-zA-Z]+$", n):
- if n == nm: return True
- elif nm.find(n)>=0:return True
+ if n == nm:
+ return True
+ elif nm.find(n) >= 0:
+ return True
return False
+
def corp_tag(nm):
global CORP_TAG
nm = rmNoise(nm)
nm = corpNorm(nm, False)
for n in CORP_TAG.keys():
if re.match(r"[0-9a-zA-Z., ]+$", n):
- if n == nm: return CORP_TAG[n]
- elif nm.find(n)>=0:
- if len(n)<3 and len(nm)/len(n)>=2:continue
+ if n == nm:
+ return CORP_TAG[n]
+ elif nm.find(n) >= 0:
+ if len(n) < 3 and len(nm) / len(n) >= 2:
+ continue
return CORP_TAG[n]
return []
-
diff --git a/deepdoc/parser/resume/entities/degrees.py b/deepdoc/parser/resume/entities/degrees.py
index dc2d5bc170cd171950a9ba8d3e77e88b94af24ce..47a90b58211a7ad1aea20ac0f4cf6c58145b8c22 100644
--- a/deepdoc/parser/resume/entities/degrees.py
+++ b/deepdoc/parser/resume/entities/degrees.py
@@ -11,27 +11,31 @@
# limitations under the License.
#
-TBL = {"94":"EMBA",
-"6":"MBA",
-"95":"MPA",
-"92":"专升本",
-"4":"专科",
-"90":"中专",
-"91":"中技",
-"86":"初中",
-"3":"博士",
-"10":"博士后",
-"1":"本科",
-"2":"硕士",
-"87":"职高",
-"89":"高中"
+TBL = {
+ "94": "EMBA",
+ "6": "MBA",
+ "95": "MPA",
+ "92": "专升本",
+ "4": "专科",
+ "90": "中专",
+ "91": "中技",
+ "86": "初中",
+ "3": "博士",
+ "10": "博士后",
+ "1": "本科",
+ "2": "硕士",
+ "87": "职高",
+ "89": "高中",
}
-TBL_ = {v:k for k,v in TBL.items()}
+TBL_ = {v: k for k, v in TBL.items()}
+
def get_name(id):
return TBL.get(str(id), "")
+
def get_id(nm):
- if not nm:return ""
+ if not nm:
+ return ""
return TBL_.get(nm.upper().strip(), "")
diff --git a/deepdoc/parser/resume/entities/industries.py b/deepdoc/parser/resume/entities/industries.py
index 9eeb10e55f5728125b281037cae0917d0c3a9c2e..4768ceb50e8995b6c2e69bf011a57663ac41445b 100644
--- a/deepdoc/parser/resume/entities/industries.py
+++ b/deepdoc/parser/resume/entities/industries.py
@@ -11,694 +11,699 @@
# limitations under the License.
#
-TBL = {"1":{"name":"IT/通信/电子","parent":"0"},
-"2":{"name":"互联网","parent":"0"},
-"3":{"name":"电子商务","parent":"2"},
-"4":{"name":"互联网金融","parent":"2"},
-"5":{"name":"网络游戏","parent":"2"},
-"6":{"name":"社交网络平台","parent":"2"},
-"7":{"name":"视频音乐","parent":"2"},
-"9":{"name":"安全","parent":"2"},
-"10":{"name":"云计算","parent":"2"},
-"12":{"name":"工具类客户端应用","parent":"2"},
-"13":{"name":"互联网广告","parent":"2"},
-"14":{"name":"企业互联网服务","parent":"2"},
-"16":{"name":"在线教育","parent":"2"},
-"17":{"name":"在线医疗","parent":"2"},
-"19":{"name":"B2B","parent":"3"},
-"20":{"name":"B2C","parent":"3"},
-"21":{"name":"C2C","parent":"3"},
-"22":{"name":"生活信息本地化","parent":"3"},
-"23":{"name":"在线旅游","parent":"2"},
-"24":{"name":"第三方支付","parent":"4"},
-"26":{"name":"客户端游戏","parent":"5"},
-"27":{"name":"网页游戏","parent":"5"},
-"28":{"name":"手机游戏","parent":"5"},
-"29":{"name":"微博","parent":"6"},
-"30":{"name":"社交网站","parent":"6"},
-"31":{"name":"在线视频","parent":"7"},
-"32":{"name":"在线音乐","parent":"7"},
-"35":{"name":"企业安全","parent":"9"},
-"36":{"name":"个人安全","parent":"9"},
-"37":{"name":"企业级云服务","parent":"10"},
-"38":{"name":"个人级云服务","parent":"10"},
-"43":{"name":"输入法","parent":"12"},
-"44":{"name":"浏览器","parent":"12"},
-"45":{"name":"词典","parent":"12"},
-"46":{"name":"播放器","parent":"12"},
-"47":{"name":"下载器","parent":"12"},
-"48":{"name":"IM","parent":"12"},
-"49":{"name":"广告服务","parent":"13"},
-"50":{"name":"第三方广告网络平台","parent":"13"},
-"51":{"name":"媒体代理","parent":"13"},
-"52":{"name":"创意代理","parent":"13"},
-"53":{"name":"IT-综合","parent":"1"},
-"71":{"name":"团购","parent":"3"},
-"72":{"name":"地图","parent":"2"},
-"73":{"name":"数据存储","parent":"2"},
-"414":{"name":"计算机软件","parent":"1"},
-"415":{"name":"计算机硬件","parent":"1"},
-"416":{"name":"计算机服务(系统、数据服务、维修)","parent":"1"},
-"417":{"name":"通信/电信/网络设备","parent":"1"},
-"418":{"name":"通信/电信运营、增值服务","parent":"1"},
-"419":{"name":"电子技术/半导体/集成电路","parent":"1"},
-"472":{"name":"P2P网贷","parent":"4"},
-"473":{"name":"互联网理财","parent":"4"},
-"474":{"name":"婚恋","parent":"6"},
-"476":{"name":"虚拟化","parent":"10"},
-"477":{"name":"邮箱","parent":"12"},
-"478":{"name":"商业智能","parent":"14"},
-"479":{"name":"企业建站","parent":"14"},
-"480":{"name":"安防","parent":"14"},
-"481":{"name":"网络营销","parent":"2"},
-"487":{"name":"智能终端","parent":"2"},
-"488":{"name":"移动互联网","parent":"2"},
-"489":{"name":"数字城市","parent":"2"},
-"490":{"name":"大数据","parent":"2"},
-"491":{"name":"互联网人力资源","parent":"2"},
-"492":{"name":"舆情监控","parent":"2"},
-"493":{"name":"移动营销","parent":"481"},
-"494":{"name":"微博营销","parent":"481"},
-"495":{"name":"精准营销","parent":"481"},
-"496":{"name":"海外营销","parent":"481"},
-"497":{"name":"微信营销","parent":"481"},
-"498":{"name":"智能手机","parent":"487"},
-"499":{"name":"可穿戴设备","parent":"487"},
-"500":{"name":"智能电视","parent":"487"},
-"501":{"name":"WAP","parent":"488"},
-"502":{"name":"物联网","parent":"489"},
-"503":{"name":"O2O","parent":"489"},
-"504":{"name":"数字出版","parent":"489"},
-"505":{"name":"搜索","parent":"2"},
-"506":{"name":"垂直搜索","parent":"505"},
-"507":{"name":"无线搜索","parent":"505"},
-"508":{"name":"网页搜索","parent":"505"},
-"509":{"name":"网址导航","parent":"2"},
-"510":{"name":"门户","parent":"2"},
-"511":{"name":"网络文学","parent":"2"},
-"512":{"name":"自媒体","parent":"2"},
-"513":{"name":"金融","parent":"0"},
-"514":{"name":"建筑与房地产","parent":"0"},
-"515":{"name":"专业服务","parent":"0"},
-"516":{"name":"教育培训","parent":"0"},
-"517":{"name":"文化传媒","parent":"0"},
-"518":{"name":"消费品","parent":"0"},
-"519":{"name":"工业","parent":"0"},
-"520":{"name":"交通物流","parent":"0"},
-"521":{"name":"贸易","parent":"0"},
-"522":{"name":"医药","parent":"0"},
-"523":{"name":"医疗器械","parent":"522"},
-"524":{"name":"保健品","parent":"518"},
-"525":{"name":"服务业","parent":"0"},
-"526":{"name":"能源/矿产/环保","parent":"0"},
-"527":{"name":"化工","parent":"0"},
-"528":{"name":"政府","parent":"0"},
-"529":{"name":"公共事业","parent":"0"},
-"530":{"name":"非盈利机构","parent":"0"},
-"531":{"name":"农业","parent":"1131"},
-"532":{"name":"林业","parent":"1131"},
-"533":{"name":"畜牧业","parent":"1131"},
-"534":{"name":"渔业","parent":"1131"},
-"535":{"name":"学术科研","parent":"0"},
-"536":{"name":"零售","parent":"0"},
-"537":{"name":"银行","parent":"513"},
-"538":{"name":"保险","parent":"513"},
-"539":{"name":"证券","parent":"513"},
-"540":{"name":"基金","parent":"513"},
-"541":{"name":"信托","parent":"513"},
-"542":{"name":"担保","parent":"513"},
-"543":{"name":"典当","parent":"513"},
-"544":{"name":"拍卖","parent":"513"},
-"545":{"name":"投资/融资","parent":"513"},
-"546":{"name":"期货","parent":"513"},
-"547":{"name":"房地产开发","parent":"514"},
-"548":{"name":"工程施工","parent":"514"},
-"549":{"name":"建筑设计","parent":"514"},
-"550":{"name":"房地产代理","parent":"514"},
-"551":{"name":"物业管理","parent":"514"},
-"552":{"name":"室内设计","parent":"514"},
-"553":{"name":"装修装潢","parent":"514"},
-"554":{"name":"市政工程","parent":"514"},
-"555":{"name":"工程造价","parent":"514"},
-"556":{"name":"工程监理","parent":"514"},
-"557":{"name":"环境工程","parent":"514"},
-"558":{"name":"园林景观","parent":"514"},
-"559":{"name":"法律","parent":"515"},
-"560":{"name":"人力资源","parent":"515"},
-"561":{"name":"会计","parent":"1125"},
-"562":{"name":"审计","parent":"515"},
-"563":{"name":"检测认证","parent":"515"},
-"565":{"name":"翻译","parent":"515"},
-"566":{"name":"中介","parent":"515"},
-"567":{"name":"咨询","parent":"515"},
-"568":{"name":"外包服务","parent":"515"},
-"569":{"name":"家教","parent":"516"},
-"570":{"name":"早教","parent":"516"},
-"571":{"name":"职业技能培训","parent":"516"},
-"572":{"name":"外语培训","parent":"516"},
-"573":{"name":"设计培训","parent":"516"},
-"574":{"name":"IT培训","parent":"516"},
-"575":{"name":"文艺体育培训","parent":"516"},
-"576":{"name":"学历教育","parent":"516"},
-"577":{"name":"管理培训","parent":"516"},
-"578":{"name":"民办基础教育","parent":"516"},
-"579":{"name":"广告","parent":"517"},
-"580":{"name":"媒体","parent":"517"},
-"581":{"name":"会展","parent":"517"},
-"582":{"name":"公关","parent":"517"},
-"583":{"name":"影视","parent":"517"},
-"584":{"name":"艺术","parent":"517"},
-"585":{"name":"文化传播","parent":"517"},
-"586":{"name":"娱乐","parent":"517"},
-"587":{"name":"体育","parent":"517"},
-"588":{"name":"出版","parent":"517"},
-"589":{"name":"休闲","parent":"517"},
-"590":{"name":"动漫","parent":"517"},
-"591":{"name":"市场推广","parent":"517"},
-"592":{"name":"市场研究","parent":"517"},
-"593":{"name":"食品","parent":"1129"},
-"594":{"name":"饮料","parent":"1129"},
-"595":{"name":"烟草","parent":"1129"},
-"596":{"name":"酒品","parent":"518"},
-"597":{"name":"服饰","parent":"518"},
-"598":{"name":"纺织","parent":"518"},
-"599":{"name":"化妆品","parent":"1129"},
-"600":{"name":"日用品","parent":"1129"},
-"601":{"name":"家电","parent":"518"},
-"602":{"name":"家具","parent":"518"},
-"603":{"name":"办公用品","parent":"518"},
-"604":{"name":"奢侈品","parent":"518"},
-"605":{"name":"珠宝","parent":"518"},
-"606":{"name":"数码产品","parent":"518"},
-"607":{"name":"玩具","parent":"518"},
-"608":{"name":"图书","parent":"518"},
-"609":{"name":"音像","parent":"518"},
-"610":{"name":"钟表","parent":"518"},
-"611":{"name":"箱包","parent":"518"},
-"612":{"name":"母婴","parent":"518"},
-"613":{"name":"营养保健","parent":"518"},
-"614":{"name":"户外用品","parent":"518"},
-"615":{"name":"健身器材","parent":"518"},
-"616":{"name":"乐器","parent":"518"},
-"617":{"name":"汽车用品","parent":"518"},
-"619":{"name":"厨具","parent":"518"},
-"620":{"name":"机械制造","parent":"519"},
-"621":{"name":"流体控制","parent":"519"},
-"622":{"name":"自动化控制","parent":"519"},
-"623":{"name":"仪器仪表","parent":"519"},
-"624":{"name":"航空/航天","parent":"519"},
-"625":{"name":"交通设施","parent":"519"},
-"626":{"name":"工业电子","parent":"519"},
-"627":{"name":"建材","parent":"519"},
-"628":{"name":"五金材料","parent":"519"},
-"629":{"name":"汽车","parent":"519"},
-"630":{"name":"印刷","parent":"519"},
-"631":{"name":"造纸","parent":"519"},
-"632":{"name":"包装","parent":"519"},
-"633":{"name":"原材料及加工","parent":"519"},
-"634":{"name":"物流","parent":"520"},
-"635":{"name":"仓储","parent":"520"},
-"636":{"name":"客运","parent":"520"},
-"637":{"name":"快递","parent":"520"},
-"638":{"name":"化学药","parent":"522"},
-"639":{"name":"中药","parent":"522"},
-"640":{"name":"生物制药","parent":"522"},
-"641":{"name":"兽药","parent":"522"},
-"642":{"name":"农药","parent":"522"},
-"643":{"name":"CRO","parent":"522"},
-"644":{"name":"消毒","parent":"522"},
-"645":{"name":"医药商业","parent":"522"},
-"646":{"name":"医疗服务","parent":"522"},
-"647":{"name":"医疗器械","parent":"523"},
-"648":{"name":"制药设备","parent":"523"},
-"649":{"name":"医用耗材","parent":"523"},
-"650":{"name":"手术器械","parent":"523"},
-"651":{"name":"保健器材","parent":"524"},
-"652":{"name":"性保健品","parent":"524"},
-"653":{"name":"医药保养","parent":"524"},
-"654":{"name":"医用保健","parent":"524"},
-"655":{"name":"酒店","parent":"525"},
-"656":{"name":"餐饮","parent":"525"},
-"657":{"name":"旅游","parent":"525"},
-"658":{"name":"生活服务","parent":"525"},
-"659":{"name":"保健服务","parent":"525"},
-"660":{"name":"运动健身","parent":"525"},
-"661":{"name":"家政服务","parent":"525"},
-"662":{"name":"婚庆服务","parent":"525"},
-"663":{"name":"租赁服务","parent":"525"},
-"664":{"name":"维修服务","parent":"525"},
-"665":{"name":"石油天然气","parent":"526"},
-"666":{"name":"电力","parent":"526"},
-"667":{"name":"新能源","parent":"526"},
-"668":{"name":"水利","parent":"526"},
-"669":{"name":"矿产","parent":"526"},
-"670":{"name":"采掘业","parent":"526"},
-"671":{"name":"冶炼","parent":"526"},
-"672":{"name":"环保","parent":"526"},
-"673":{"name":"无机化工原料","parent":"527"},
-"674":{"name":"有机化工原料","parent":"527"},
-"675":{"name":"精细化学品","parent":"527"},
-"676":{"name":"化工设备","parent":"527"},
-"677":{"name":"化工工程","parent":"527"},
-"678":{"name":"资产管理","parent":"513"},
-"679":{"name":"金融租赁","parent":"513"},
-"680":{"name":"征信及信评机构","parent":"513"},
-"681":{"name":"资产评估机构","parent":"513"},
-"683":{"name":"金融监管机构","parent":"513"},
-"684":{"name":"国际贸易","parent":"521"},
-"685":{"name":"海关","parent":"521"},
-"686":{"name":"购物中心","parent":"536"},
-"687":{"name":"超市","parent":"536"},
-"688":{"name":"便利店","parent":"536"},
-"689":{"name":"专卖店","parent":"536"},
-"690":{"name":"专业店","parent":"536"},
-"691":{"name":"百货店","parent":"536"},
-"692":{"name":"杂货店","parent":"536"},
-"693":{"name":"个人银行","parent":"537"},
-"695":{"name":"私人银行","parent":"537"},
-"696":{"name":"公司银行","parent":"537"},
-"697":{"name":"投资银行","parent":"537"},
-"698":{"name":"政策性银行","parent":"537"},
-"699":{"name":"中央银行","parent":"537"},
-"700":{"name":"人寿险","parent":"538"},
-"701":{"name":"财产险","parent":"538"},
-"702":{"name":"再保险","parent":"538"},
-"703":{"name":"养老险","parent":"538"},
-"704":{"name":"保险代理公司","parent":"538"},
-"705":{"name":"公募基金","parent":"540"},
-"707":{"name":"私募基金","parent":"540"},
-"708":{"name":"第三方理财","parent":"679"},
-"709":{"name":"资产管理公司","parent":"679"},
-"711":{"name":"房产中介","parent":"566"},
-"712":{"name":"职业中介","parent":"566"},
-"713":{"name":"婚姻中介","parent":"566"},
-"714":{"name":"战略咨询","parent":"567"},
-"715":{"name":"投资咨询","parent":"567"},
-"716":{"name":"心理咨询","parent":"567"},
-"717":{"name":"留学移民咨询","parent":"567"},
-"718":{"name":"工商注册代理","parent":"568"},
-"719":{"name":"商标专利代理","parent":"568"},
-"720":{"name":"财务代理","parent":"568"},
-"721":{"name":"工程机械","parent":"620"},
-"722":{"name":"农业机械","parent":"620"},
-"723":{"name":"海工设备","parent":"620"},
-"724":{"name":"包装机械","parent":"620"},
-"725":{"name":"印刷机械","parent":"620"},
-"726":{"name":"数控机床","parent":"620"},
-"727":{"name":"矿山机械","parent":"620"},
-"728":{"name":"水泵","parent":"621"},
-"729":{"name":"管道","parent":"621"},
-"730":{"name":"阀门","parent":"621"},
-"732":{"name":"压缩机","parent":"621"},
-"733":{"name":"集散控制系统","parent":"622"},
-"734":{"name":"远程控制","parent":"622"},
-"735":{"name":"液压系统","parent":"622"},
-"736":{"name":"楼宇智能化","parent":"622"},
-"737":{"name":"飞机制造","parent":"624"},
-"738":{"name":"航空公司","parent":"624"},
-"739":{"name":"发动机","parent":"624"},
-"740":{"name":"复合材料","parent":"624"},
-"741":{"name":"高铁","parent":"625"},
-"742":{"name":"地铁","parent":"625"},
-"743":{"name":"信号传输","parent":"625"},
-"745":{"name":"结构材料","parent":"627"},
-"746":{"name":"装饰材料","parent":"627"},
-"747":{"name":"专用材料","parent":"627"},
-"749":{"name":"经销商集团","parent":"629"},
-"750":{"name":"整车制造","parent":"629"},
-"751":{"name":"汽车零配件","parent":"629"},
-"752":{"name":"外型设计","parent":"629"},
-"753":{"name":"平版印刷","parent":"630"},
-"754":{"name":"凸版印刷","parent":"630"},
-"755":{"name":"凹版印刷","parent":"630"},
-"756":{"name":"孔版印刷","parent":"630"},
-"757":{"name":"印刷用纸","parent":"631"},
-"758":{"name":"书写、制图及复制用纸","parent":"631"},
-"759":{"name":"包装用纸","parent":"631"},
-"760":{"name":"生活、卫生及装饰用纸","parent":"631"},
-"761":{"name":"技术用纸","parent":"631"},
-"762":{"name":"加工纸原纸","parent":"631"},
-"763":{"name":"食品包装","parent":"632"},
-"764":{"name":"医药包装","parent":"632"},
-"765":{"name":"日化包装","parent":"632"},
-"766":{"name":"物流包装","parent":"632"},
-"767":{"name":"礼品包装","parent":"632"},
-"768":{"name":"电子五金包装","parent":"632"},
-"769":{"name":"汽车服务","parent":"525"},
-"770":{"name":"汽车保养","parent":"769"},
-"771":{"name":"租车","parent":"769"},
-"773":{"name":"出租车","parent":"769"},
-"774":{"name":"代驾","parent":"769"},
-"775":{"name":"发电","parent":"666"},
-"777":{"name":"输配电","parent":"666"},
-"779":{"name":"风电","parent":"667"},
-"780":{"name":"光伏/太阳能","parent":"667"},
-"781":{"name":"生物质发电","parent":"667"},
-"782":{"name":"煤化工","parent":"667"},
-"783":{"name":"垃圾发电","parent":"667"},
-"784":{"name":"核电","parent":"667"},
-"785":{"name":"能源矿产","parent":"669"},
-"786":{"name":"金属矿产","parent":"669"},
-"787":{"name":"非金属矿产","parent":"669"},
-"788":{"name":"水气矿产","parent":"669"},
-"789":{"name":"锅炉","parent":"775"},
-"790":{"name":"发电机","parent":"775"},
-"791":{"name":"汽轮机","parent":"775"},
-"792":{"name":"燃机","parent":"775"},
-"793":{"name":"冷却","parent":"775"},
-"794":{"name":"电力设计院","parent":"775"},
-"795":{"name":"高压输配电","parent":"777"},
-"796":{"name":"中压输配电","parent":"777"},
-"797":{"name":"低压输配电","parent":"777"},
-"798":{"name":"继电保护","parent":"777"},
-"799":{"name":"智能电网","parent":"777"},
-"800":{"name":"小学","parent":"516"},
-"801":{"name":"电动车","parent":"519"},
-"802":{"name":"皮具箱包","parent":"518"},
-"803":{"name":"医药制造","parent":"522"},
-"804":{"name":"电器销售","parent":"536"},
-"805":{"name":"塑料制品","parent":"527"},
-"806":{"name":"公益基金会","parent":"530"},
-"807":{"name":"美发服务","parent":"525"},
-"808":{"name":"农业养殖","parent":"531"},
-"809":{"name":"金融服务","parent":"513"},
-"810":{"name":"商业地产综合体","parent":"514"},
-"811":{"name":"美容服务","parent":"525"},
-"812":{"name":"灯饰","parent":"518"},
-"813":{"name":"油墨颜料产品","parent":"527"},
-"814":{"name":"眼镜制造","parent":"518"},
-"815":{"name":"农业生物技术","parent":"531"},
-"816":{"name":"体育用品","parent":"518"},
-"817":{"name":"保健用品","parent":"524"},
-"818":{"name":"化学化工产品","parent":"527"},
-"819":{"name":"饲料","parent":"531"},
-"821":{"name":"保安服务","parent":"525"},
-"822":{"name":"干细胞技术","parent":"522"},
-"824":{"name":"农药化肥","parent":"527"},
-"825":{"name":"卫生洁具","parent":"518"},
-"826":{"name":"体育器材、场馆","parent":"518"},
-"827":{"name":"饲料加工","parent":"531"},
-"828":{"name":"测绘服务","parent":"529"},
-"830":{"name":"金属船舶制造","parent":"519"},
-"831":{"name":"基因工程","parent":"522"},
-"832":{"name":"花卉服务","parent":"536"},
-"833":{"name":"农业种植","parent":"531"},
-"834":{"name":"皮革制品","parent":"518"},
-"835":{"name":"地理信息加工服务","parent":"529"},
-"836":{"name":"机器人","parent":"519"},
-"837":{"name":"礼品","parent":"518"},
-"838":{"name":"理发及美容服务","parent":"525"},
-"839":{"name":"其他清洁服务","parent":"525"},
-"840":{"name":"硅胶材料","parent":"527"},
-"841":{"name":"茶叶销售","parent":"518"},
-"842":{"name":"彩票活动","parent":"529"},
-"843":{"name":"化妆培训","parent":"516"},
-"844":{"name":"鞋业","parent":"518"},
-"845":{"name":"酒店用品","parent":"518"},
-"846":{"name":"复合材料","parent":"527"},
-"847":{"name":"房地产工程建设","parent":"548"},
-"848":{"name":"知识产权服务","parent":"559"},
-"849":{"name":"新型建材","parent":"627"},
-"850":{"name":"企业投资咨询","parent":"567"},
-"851":{"name":"含乳饮料和植物蛋白饮料制造","parent":"594"},
-"852":{"name":"汽车检测设备","parent":"629"},
-"853":{"name":"手机通讯器材","parent":"417"},
-"854":{"name":"环保材料","parent":"672"},
-"855":{"name":"交通设施","parent":"554"},
-"856":{"name":"电子器件","parent":"419"},
-"857":{"name":"啤酒","parent":"594"},
-"858":{"name":"生态旅游","parent":"657"},
-"859":{"name":"自动化设备","parent":"626"},
-"860":{"name":"软件开发","parent":"414"},
-"861":{"name":"葡萄酒销售","parent":"594"},
-"862":{"name":"钢材","parent":"633"},
-"863":{"name":"餐饮培训","parent":"656"},
-"864":{"name":"速冻食品","parent":"593"},
-"865":{"name":"空气环保","parent":"672"},
-"866":{"name":"互联网房地产经纪服务","parent":"550"},
-"867":{"name":"食品添加剂","parent":"593"},
-"868":{"name":"演艺传播","parent":"585"},
-"869":{"name":"信用卡","parent":"537"},
-"870":{"name":"报纸期刊广告","parent":"579"},
-"871":{"name":"摄影","parent":"525"},
-"872":{"name":"手机软件","parent":"414"},
-"873":{"name":"地坪建材","parent":"627"},
-"874":{"name":"企业管理咨询","parent":"567"},
-"875":{"name":"幼儿教育","parent":"570"},
-"876":{"name":"系统集成","parent":"416"},
-"877":{"name":"皮革服饰","parent":"597"},
-"878":{"name":"保健食品","parent":"593"},
-"879":{"name":"叉车","parent":"620"},
-"880":{"name":"厨卫电器","parent":"601"},
-"882":{"name":"地暖设备","parent":"627"},
-"883":{"name":"钢结构制造","parent":"548"},
-"884":{"name":"投影机","parent":"606"},
-"885":{"name":"啤酒销售","parent":"594"},
-"886":{"name":"度假村旅游","parent":"657"},
-"887":{"name":"电力元件设备","parent":"626"},
-"888":{"name":"管理软件","parent":"414"},
-"889":{"name":"轴承","parent":"628"},
-"890":{"name":"餐饮设备","parent":"656"},
-"891":{"name":"肉制品及副产品加工","parent":"593"},
-"892":{"name":"艺术收藏品投资交易","parent":"584"},
-"893":{"name":"净水器","parent":"601"},
-"894":{"name":"进口食品","parent":"593"},
-"895":{"name":"娱乐文化传播","parent":"585"},
-"896":{"name":"文化传播","parent":"585"},
-"897":{"name":"商旅传媒","parent":"580"},
-"898":{"name":"广告设计制作","parent":"579"},
-"899":{"name":"金属丝绳及其制品制造","parent":"627"},
-"900":{"name":"建筑涂料","parent":"627"},
-"901":{"name":"抵押贷款","parent":"543"},
-"902":{"name":"早教","parent":"570"},
-"903":{"name":"电影放映","parent":"583"},
-"904":{"name":"内衣服饰","parent":"597"},
-"905":{"name":"无线网络通信","parent":"418"},
-"906":{"name":"记忆卡","parent":"415"},
-"907":{"name":"女装服饰","parent":"597"},
-"908":{"name":"建筑机械","parent":"620"},
-"909":{"name":"制冷电器","parent":"601"},
-"910":{"name":"通信设备","parent":"417"},
-"911":{"name":"空调设备","parent":"601"},
-"912":{"name":"建筑装饰","parent":"553"},
-"913":{"name":"办公设备","parent":"603"},
-"916":{"name":"数据处理软件","parent":"414"},
-"917":{"name":"葡萄酒贸易","parent":"594"},
-"918":{"name":"通讯器材","parent":"417"},
-"919":{"name":"铜业","parent":"633"},
-"920":{"name":"食堂","parent":"656"},
-"921":{"name":"糖果零食","parent":"593"},
-"922":{"name":"文化艺术传播","parent":"584"},
-"923":{"name":"太阳能电器","parent":"601"},
-"924":{"name":"药品零售","parent":"645"},
-"925":{"name":"果蔬食品","parent":"593"},
-"926":{"name":"文化活动策划","parent":"585"},
-"928":{"name":"汽车广告","parent":"657"},
-"929":{"name":"条码设备","parent":"630"},
-"930":{"name":"建筑石材","parent":"627"},
-"931":{"name":"贵金属","parent":"545"},
-"932":{"name":"体育","parent":"660"},
-"933":{"name":"金融信息服务","parent":"414"},
-"934":{"name":"玻璃建材","parent":"627"},
-"935":{"name":"家教","parent":"569"},
-"936":{"name":"歌舞厅娱乐活动","parent":"586"},
-"937":{"name":"计算机服务器","parent":"415"},
-"938":{"name":"管道","parent":"627"},
-"939":{"name":"婴幼儿服饰","parent":"597"},
-"940":{"name":"热水器","parent":"601"},
-"941":{"name":"计算机及零部件制造","parent":"415"},
-"942":{"name":"钢铁贸易","parent":"633"},
-"944":{"name":"包装材料","parent":"632"},
-"945":{"name":"计算机办公设备","parent":"603"},
-"946":{"name":"白酒","parent":"594"},
-"948":{"name":"发动机","parent":"620"},
-"949":{"name":"快餐服务","parent":"656"},
-"950":{"name":"酒类销售","parent":"594"},
-"951":{"name":"电子产品、机电设备","parent":"626"},
-"952":{"name":"激光设备","parent":"626"},
-"953":{"name":"餐饮策划","parent":"656"},
-"954":{"name":"饮料、食品","parent":"594"},
-"955":{"name":"文化娱乐经纪","parent":"585"},
-"956":{"name":"天然气","parent":"665"},
-"957":{"name":"农副食品","parent":"593"},
-"958":{"name":"艺术表演","parent":"585"},
-"959":{"name":"石膏、水泥制品及类似制品制造","parent":"627"},
-"960":{"name":"橱柜","parent":"602"},
-"961":{"name":"管理培训","parent":"577"},
-"962":{"name":"男装服饰","parent":"597"},
-"963":{"name":"化肥制造","parent":"675"},
-"964":{"name":"童装服饰","parent":"597"},
-"965":{"name":"电源电池","parent":"626"},
-"966":{"name":"家电维修","parent":"664"},
-"967":{"name":"光电子器件","parent":"419"},
-"968":{"name":"旅行社服务","parent":"657"},
-"969":{"name":"电线、电缆制造","parent":"626"},
-"970":{"name":"软件开发、信息系统集成","parent":"419"},
-"971":{"name":"白酒制造","parent":"594"},
-"973":{"name":"甜品服务","parent":"656"},
-"974":{"name":"糕点、面包制造","parent":"593"},
-"975":{"name":"木工机械","parent":"620"},
-"976":{"name":"酒吧服务","parent":"656"},
-"977":{"name":"火腿肠","parent":"593"},
-"978":{"name":"广告策划推广","parent":"579"},
-"979":{"name":"新能源产品和生产装备制造","parent":"667"},
-"980":{"name":"调味品","parent":"593"},
-"981":{"name":"礼仪表演","parent":"585"},
-"982":{"name":"劳务派遣","parent":"560"},
-"983":{"name":"建材零售","parent":"627"},
-"984":{"name":"商品交易中心","parent":"545"},
-"985":{"name":"体育推广","parent":"585"},
-"986":{"name":"茶饮料及其他饮料制造","parent":"594"},
-"987":{"name":"金属建材","parent":"627"},
-"988":{"name":"职业技能培训","parent":"571"},
-"989":{"name":"网吧活动","parent":"586"},
-"990":{"name":"洗衣服务","parent":"658"},
-"991":{"name":"管道工程","parent":"554"},
-"992":{"name":"通信工程","parent":"417"},
-"993":{"name":"电子元器件","parent":"626"},
-"994":{"name":"电子设备","parent":"419"},
-"995":{"name":"茶馆服务","parent":"656"},
-"996":{"name":"旅游开发","parent":"657"},
-"997":{"name":"视频通讯","parent":"417"},
-"998":{"name":"白酒销售","parent":"594"},
-"1000":{"name":"咖啡馆服务","parent":"656"},
-"1001":{"name":"食品零售","parent":"593"},
-"1002":{"name":"健康疗养旅游","parent":"655"},
-"1003":{"name":"粮油食品","parent":"593"},
-"1004":{"name":"儿童教育影视","parent":"583"},
-"1005":{"name":"新能源发电","parent":"667"},
-"1006":{"name":"旅游策划","parent":"657"},
-"1007":{"name":"绘画","parent":"575"},
-"1008":{"name":"方便面及其他方便食品","parent":"593"},
-"1009":{"name":"房地产经纪","parent":"550"},
-"1010":{"name":"母婴家政","parent":"661"},
-"1011":{"name":"居家养老健康服务","parent":"661"},
-"1012":{"name":"文化艺术投资","parent":"545"},
-"1013":{"name":"运动健身","parent":"660"},
-"1014":{"name":"瓶(罐)装饮用水制造","parent":"594"},
-"1015":{"name":"金属门窗","parent":"627"},
-"1016":{"name":"机动车检测","parent":"563"},
-"1017":{"name":"货物运输","parent":"634"},
-"1018":{"name":"服饰专卖","parent":"690"},
-"1019":{"name":"酒店服装","parent":"597"},
-"1020":{"name":"通讯软件","parent":"417"},
-"1021":{"name":"消防工程","parent":"554"},
-"1022":{"name":"嵌入式电子系统","parent":"419"},
-"1023":{"name":"航空票务","parent":"636"},
-"1024":{"name":"电气设备","parent":"626"},
-"1025":{"name":"酒业贸易","parent":"594"},
-"1027":{"name":"其他饮料及冷饮服务","parent":"656"},
-"1028":{"name":"乳制品","parent":"593"},
-"1029":{"name":"新闻期刊出版","parent":"588"},
-"1030":{"name":"水污染治理","parent":"672"},
-"1031":{"name":"谷物食品","parent":"593"},
-"1032":{"name":"数字动漫设计制造服务","parent":"590"},
-"1033":{"name":"医院","parent":"646"},
-"1034":{"name":"旅游广告","parent":"657"},
-"1035":{"name":"办公家具","parent":"602"},
-"1036":{"name":"房地产营销策划","parent":"550"},
-"1037":{"name":"保洁家政","parent":"661"},
-"1038":{"name":"水泥制造","parent":"627"},
-"1039":{"name":"市场研究咨询","parent":"567"},
-"1040":{"name":"驾校","parent":"571"},
-"1041":{"name":"正餐服务","parent":"656"},
-"1043":{"name":"机动车燃油","parent":"665"},
-"1044":{"name":"食品","parent":"593"},
-"1045":{"name":"新能源汽车","parent":"629"},
-"1046":{"name":"手机无线网络推广","parent":"417"},
-"1047":{"name":"环保设备","parent":"672"},
-"1048":{"name":"通讯工程","parent":"418"},
-"1049":{"name":"半导体集成电路","parent":"419"},
-"1050":{"name":"航空服务","parent":"636"},
-"1051":{"name":"电机设备","parent":"626"},
-"1052":{"name":"档案软件","parent":"414"},
-"1053":{"name":"冷链物流服务","parent":"634"},
-"1054":{"name":"小吃服务","parent":"656"},
-"1055":{"name":"水产品加工","parent":"593"},
-"1056":{"name":"图书出版","parent":"588"},
-"1057":{"name":"固体废物治理","parent":"672"},
-"1059":{"name":"坚果食品","parent":"593"},
-"1060":{"name":"广告传媒","parent":"579"},
-"1061":{"name":"电梯","parent":"622"},
-"1062":{"name":"社区医疗与卫生院","parent":"646"},
-"1063":{"name":"广告、印刷包装","parent":"630"},
-"1064":{"name":"婚纱礼服","parent":"662"},
-"1065":{"name":"地毯","parent":"602"},
-"1066":{"name":"互联网物业","parent":"551"},
-"1067":{"name":"跨境电商","parent":"3"},
-"1068":{"name":"信息安全、系统集成","parent":"9"},
-"1069":{"name":"专用汽车制造","parent":"750"},
-"1070":{"name":"商品贸易","parent":"3"},
-"1071":{"name":"墙壁装饰材料","parent":"746"},
-"1072":{"name":"窗帘装饰材料","parent":"746"},
-"1073":{"name":"电子商务、本地生活服务","parent":"3"},
-"1075":{"name":"白酒电子商务","parent":"3"},
-"1076":{"name":"商品贸易、电子商务","parent":"3"},
-"1077":{"name":"木质装饰材料","parent":"746"},
-"1078":{"name":"电子商务、汽车电商交易平台","parent":"3"},
-"1079":{"name":"汽车轮胎","parent":"751"},
-"1080":{"name":"气体压缩机械制造","parent":"732"},
-"1081":{"name":"家装家具电子商务","parent":"3"},
-"1082":{"name":"化妆品电子商务","parent":"3"},
-"1083":{"name":"汽车销售","parent":"749"},
-"1084":{"name":"新闻资讯网站","parent":"510"},
-"1085":{"name":"母婴电商","parent":"3"},
-"1086":{"name":"电商商务、收藏品交易","parent":"3"},
-"1088":{"name":"电子商务、数码产品","parent":"3"},
-"1089":{"name":"二手车交易","parent":"749"},
-"1090":{"name":"游戏制作服务","parent":"5"},
-"1091":{"name":"母婴服务","parent":"510"},
-"1092":{"name":"家具电子商务","parent":"3"},
-"1093":{"name":"汽车配件电子商务","parent":"3"},
-"1094":{"name":"输配电设备","parent":"777"},
-"1095":{"name":"矿山设备","parent":"727"},
-"1096":{"name":"机床机械","parent":"726"},
-"1097":{"name":"农产品电商","parent":"3"},
-"1098":{"name":"陶瓷装饰材料","parent":"746"},
-"1099":{"name":"车载联网设备","parent":"487"},
-"1100":{"name":"汽车销售电子商务","parent":"3"},
-"1101":{"name":"石油设备","parent":"730"},
-"1102":{"name":"智能家居","parent":"487"},
-"1103":{"name":"散热器","parent":"751"},
-"1104":{"name":"电力工程","parent":"775"},
-"1105":{"name":"生鲜电商","parent":"3"},
-"1106":{"name":"互联网数据服务","parent":"490"},
-"1107":{"name":"房车、商务车销售","parent":"749"},
-"1108":{"name":"茶叶电子商务","parent":"3"},
-"1109":{"name":"酒类电子商务","parent":"3"},
-"1110":{"name":"阀门","parent":"730"},
-"1111":{"name":"食品电商","parent":"3"},
-"1112":{"name":"儿童摄影","parent":"871"},
-"1113":{"name":"广告摄影","parent":"871"},
-"1114":{"name":"婚纱摄影","parent":"871"},
-"1115":{"name":"模具制造","parent":"620"},
-"1116":{"name":"汽车模具","parent":"629"},
-"1117":{"name":"认证咨询","parent":"567"},
-"1118":{"name":"数字视觉制作服务","parent":"590"},
-"1119":{"name":"牙科及医疗器械","parent":"646"},
-"1120":{"name":"猎头招聘","parent":"560"},
-"1121":{"name":"家居","parent":"518"},
-"1122":{"name":"收藏品","parent":"518"},
-"1123":{"name":"首饰","parent":"518"},
-"1124":{"name":"工艺品","parent":"518"},
-"1125":{"name":"财务","parent":"515"},
-"1126":{"name":"税务","parent":"515"},
-"1127":{"name":"分类信息","parent":"2"},
-"1128":{"name":"宠物","parent":"0"},
-"1129":{"name":"快消品","parent":"518"},
-"1130":{"name":"人工智能","parent":"2"},
-"1131":{"name":"农/林/牧/渔","parent":"0"}
+TBL = {
+ "1": {"name": "IT/通信/电子", "parent": "0"},
+ "2": {"name": "互联网", "parent": "0"},
+ "3": {"name": "电子商务", "parent": "2"},
+ "4": {"name": "互联网金融", "parent": "2"},
+ "5": {"name": "网络游戏", "parent": "2"},
+ "6": {"name": "社交网络平台", "parent": "2"},
+ "7": {"name": "视频音乐", "parent": "2"},
+ "9": {"name": "安全", "parent": "2"},
+ "10": {"name": "云计算", "parent": "2"},
+ "12": {"name": "工具类客户端应用", "parent": "2"},
+ "13": {"name": "互联网广告", "parent": "2"},
+ "14": {"name": "企业互联网服务", "parent": "2"},
+ "16": {"name": "在线教育", "parent": "2"},
+ "17": {"name": "在线医疗", "parent": "2"},
+ "19": {"name": "B2B", "parent": "3"},
+ "20": {"name": "B2C", "parent": "3"},
+ "21": {"name": "C2C", "parent": "3"},
+ "22": {"name": "生活信息本地化", "parent": "3"},
+ "23": {"name": "在线旅游", "parent": "2"},
+ "24": {"name": "第三方支付", "parent": "4"},
+ "26": {"name": "客户端游戏", "parent": "5"},
+ "27": {"name": "网页游戏", "parent": "5"},
+ "28": {"name": "手机游戏", "parent": "5"},
+ "29": {"name": "微博", "parent": "6"},
+ "30": {"name": "社交网站", "parent": "6"},
+ "31": {"name": "在线视频", "parent": "7"},
+ "32": {"name": "在线音乐", "parent": "7"},
+ "35": {"name": "企业安全", "parent": "9"},
+ "36": {"name": "个人安全", "parent": "9"},
+ "37": {"name": "企业级云服务", "parent": "10"},
+ "38": {"name": "个人级云服务", "parent": "10"},
+ "43": {"name": "输入法", "parent": "12"},
+ "44": {"name": "浏览器", "parent": "12"},
+ "45": {"name": "词典", "parent": "12"},
+ "46": {"name": "播放器", "parent": "12"},
+ "47": {"name": "下载器", "parent": "12"},
+ "48": {"name": "IM", "parent": "12"},
+ "49": {"name": "广告服务", "parent": "13"},
+ "50": {"name": "第三方广告网络平台", "parent": "13"},
+ "51": {"name": "媒体代理", "parent": "13"},
+ "52": {"name": "创意代理", "parent": "13"},
+ "53": {"name": "IT-综合", "parent": "1"},
+ "71": {"name": "团购", "parent": "3"},
+ "72": {"name": "地图", "parent": "2"},
+ "73": {"name": "数据存储", "parent": "2"},
+ "414": {"name": "计算机软件", "parent": "1"},
+ "415": {"name": "计算机硬件", "parent": "1"},
+ "416": {"name": "计算机服务(系统、数据服务、维修)", "parent": "1"},
+ "417": {"name": "通信/电信/网络设备", "parent": "1"},
+ "418": {"name": "通信/电信运营、增值服务", "parent": "1"},
+ "419": {"name": "电子技术/半导体/集成电路", "parent": "1"},
+ "472": {"name": "P2P网贷", "parent": "4"},
+ "473": {"name": "互联网理财", "parent": "4"},
+ "474": {"name": "婚恋", "parent": "6"},
+ "476": {"name": "虚拟化", "parent": "10"},
+ "477": {"name": "邮箱", "parent": "12"},
+ "478": {"name": "商业智能", "parent": "14"},
+ "479": {"name": "企业建站", "parent": "14"},
+ "480": {"name": "安防", "parent": "14"},
+ "481": {"name": "网络营销", "parent": "2"},
+ "487": {"name": "智能终端", "parent": "2"},
+ "488": {"name": "移动互联网", "parent": "2"},
+ "489": {"name": "数字城市", "parent": "2"},
+ "490": {"name": "大数据", "parent": "2"},
+ "491": {"name": "互联网人力资源", "parent": "2"},
+ "492": {"name": "舆情监控", "parent": "2"},
+ "493": {"name": "移动营销", "parent": "481"},
+ "494": {"name": "微博营销", "parent": "481"},
+ "495": {"name": "精准营销", "parent": "481"},
+ "496": {"name": "海外营销", "parent": "481"},
+ "497": {"name": "微信营销", "parent": "481"},
+ "498": {"name": "智能手机", "parent": "487"},
+ "499": {"name": "可穿戴设备", "parent": "487"},
+ "500": {"name": "智能电视", "parent": "487"},
+ "501": {"name": "WAP", "parent": "488"},
+ "502": {"name": "物联网", "parent": "489"},
+ "503": {"name": "O2O", "parent": "489"},
+ "504": {"name": "数字出版", "parent": "489"},
+ "505": {"name": "搜索", "parent": "2"},
+ "506": {"name": "垂直搜索", "parent": "505"},
+ "507": {"name": "无线搜索", "parent": "505"},
+ "508": {"name": "网页搜索", "parent": "505"},
+ "509": {"name": "网址导航", "parent": "2"},
+ "510": {"name": "门户", "parent": "2"},
+ "511": {"name": "网络文学", "parent": "2"},
+ "512": {"name": "自媒体", "parent": "2"},
+ "513": {"name": "金融", "parent": "0"},
+ "514": {"name": "建筑与房地产", "parent": "0"},
+ "515": {"name": "专业服务", "parent": "0"},
+ "516": {"name": "教育培训", "parent": "0"},
+ "517": {"name": "文化传媒", "parent": "0"},
+ "518": {"name": "消费品", "parent": "0"},
+ "519": {"name": "工业", "parent": "0"},
+ "520": {"name": "交通物流", "parent": "0"},
+ "521": {"name": "贸易", "parent": "0"},
+ "522": {"name": "医药", "parent": "0"},
+ "523": {"name": "医疗器械", "parent": "522"},
+ "524": {"name": "保健品", "parent": "518"},
+ "525": {"name": "服务业", "parent": "0"},
+ "526": {"name": "能源/矿产/环保", "parent": "0"},
+ "527": {"name": "化工", "parent": "0"},
+ "528": {"name": "政府", "parent": "0"},
+ "529": {"name": "公共事业", "parent": "0"},
+ "530": {"name": "非盈利机构", "parent": "0"},
+ "531": {"name": "农业", "parent": "1131"},
+ "532": {"name": "林业", "parent": "1131"},
+ "533": {"name": "畜牧业", "parent": "1131"},
+ "534": {"name": "渔业", "parent": "1131"},
+ "535": {"name": "学术科研", "parent": "0"},
+ "536": {"name": "零售", "parent": "0"},
+ "537": {"name": "银行", "parent": "513"},
+ "538": {"name": "保险", "parent": "513"},
+ "539": {"name": "证券", "parent": "513"},
+ "540": {"name": "基金", "parent": "513"},
+ "541": {"name": "信托", "parent": "513"},
+ "542": {"name": "担保", "parent": "513"},
+ "543": {"name": "典当", "parent": "513"},
+ "544": {"name": "拍卖", "parent": "513"},
+ "545": {"name": "投资/融资", "parent": "513"},
+ "546": {"name": "期货", "parent": "513"},
+ "547": {"name": "房地产开发", "parent": "514"},
+ "548": {"name": "工程施工", "parent": "514"},
+ "549": {"name": "建筑设计", "parent": "514"},
+ "550": {"name": "房地产代理", "parent": "514"},
+ "551": {"name": "物业管理", "parent": "514"},
+ "552": {"name": "室内设计", "parent": "514"},
+ "553": {"name": "装修装潢", "parent": "514"},
+ "554": {"name": "市政工程", "parent": "514"},
+ "555": {"name": "工程造价", "parent": "514"},
+ "556": {"name": "工程监理", "parent": "514"},
+ "557": {"name": "环境工程", "parent": "514"},
+ "558": {"name": "园林景观", "parent": "514"},
+ "559": {"name": "法律", "parent": "515"},
+ "560": {"name": "人力资源", "parent": "515"},
+ "561": {"name": "会计", "parent": "1125"},
+ "562": {"name": "审计", "parent": "515"},
+ "563": {"name": "检测认证", "parent": "515"},
+ "565": {"name": "翻译", "parent": "515"},
+ "566": {"name": "中介", "parent": "515"},
+ "567": {"name": "咨询", "parent": "515"},
+ "568": {"name": "外包服务", "parent": "515"},
+ "569": {"name": "家教", "parent": "516"},
+ "570": {"name": "早教", "parent": "516"},
+ "571": {"name": "职业技能培训", "parent": "516"},
+ "572": {"name": "外语培训", "parent": "516"},
+ "573": {"name": "设计培训", "parent": "516"},
+ "574": {"name": "IT培训", "parent": "516"},
+ "575": {"name": "文艺体育培训", "parent": "516"},
+ "576": {"name": "学历教育", "parent": "516"},
+ "577": {"name": "管理培训", "parent": "516"},
+ "578": {"name": "民办基础教育", "parent": "516"},
+ "579": {"name": "广告", "parent": "517"},
+ "580": {"name": "媒体", "parent": "517"},
+ "581": {"name": "会展", "parent": "517"},
+ "582": {"name": "公关", "parent": "517"},
+ "583": {"name": "影视", "parent": "517"},
+ "584": {"name": "艺术", "parent": "517"},
+ "585": {"name": "文化传播", "parent": "517"},
+ "586": {"name": "娱乐", "parent": "517"},
+ "587": {"name": "体育", "parent": "517"},
+ "588": {"name": "出版", "parent": "517"},
+ "589": {"name": "休闲", "parent": "517"},
+ "590": {"name": "动漫", "parent": "517"},
+ "591": {"name": "市场推广", "parent": "517"},
+ "592": {"name": "市场研究", "parent": "517"},
+ "593": {"name": "食品", "parent": "1129"},
+ "594": {"name": "饮料", "parent": "1129"},
+ "595": {"name": "烟草", "parent": "1129"},
+ "596": {"name": "酒品", "parent": "518"},
+ "597": {"name": "服饰", "parent": "518"},
+ "598": {"name": "纺织", "parent": "518"},
+ "599": {"name": "化妆品", "parent": "1129"},
+ "600": {"name": "日用品", "parent": "1129"},
+ "601": {"name": "家电", "parent": "518"},
+ "602": {"name": "家具", "parent": "518"},
+ "603": {"name": "办公用品", "parent": "518"},
+ "604": {"name": "奢侈品", "parent": "518"},
+ "605": {"name": "珠宝", "parent": "518"},
+ "606": {"name": "数码产品", "parent": "518"},
+ "607": {"name": "玩具", "parent": "518"},
+ "608": {"name": "图书", "parent": "518"},
+ "609": {"name": "音像", "parent": "518"},
+ "610": {"name": "钟表", "parent": "518"},
+ "611": {"name": "箱包", "parent": "518"},
+ "612": {"name": "母婴", "parent": "518"},
+ "613": {"name": "营养保健", "parent": "518"},
+ "614": {"name": "户外用品", "parent": "518"},
+ "615": {"name": "健身器材", "parent": "518"},
+ "616": {"name": "乐器", "parent": "518"},
+ "617": {"name": "汽车用品", "parent": "518"},
+ "619": {"name": "厨具", "parent": "518"},
+ "620": {"name": "机械制造", "parent": "519"},
+ "621": {"name": "流体控制", "parent": "519"},
+ "622": {"name": "自动化控制", "parent": "519"},
+ "623": {"name": "仪器仪表", "parent": "519"},
+ "624": {"name": "航空/航天", "parent": "519"},
+ "625": {"name": "交通设施", "parent": "519"},
+ "626": {"name": "工业电子", "parent": "519"},
+ "627": {"name": "建材", "parent": "519"},
+ "628": {"name": "五金材料", "parent": "519"},
+ "629": {"name": "汽车", "parent": "519"},
+ "630": {"name": "印刷", "parent": "519"},
+ "631": {"name": "造纸", "parent": "519"},
+ "632": {"name": "包装", "parent": "519"},
+ "633": {"name": "原材料及加工", "parent": "519"},
+ "634": {"name": "物流", "parent": "520"},
+ "635": {"name": "仓储", "parent": "520"},
+ "636": {"name": "客运", "parent": "520"},
+ "637": {"name": "快递", "parent": "520"},
+ "638": {"name": "化学药", "parent": "522"},
+ "639": {"name": "中药", "parent": "522"},
+ "640": {"name": "生物制药", "parent": "522"},
+ "641": {"name": "兽药", "parent": "522"},
+ "642": {"name": "农药", "parent": "522"},
+ "643": {"name": "CRO", "parent": "522"},
+ "644": {"name": "消毒", "parent": "522"},
+ "645": {"name": "医药商业", "parent": "522"},
+ "646": {"name": "医疗服务", "parent": "522"},
+ "647": {"name": "医疗器械", "parent": "523"},
+ "648": {"name": "制药设备", "parent": "523"},
+ "649": {"name": "医用耗材", "parent": "523"},
+ "650": {"name": "手术器械", "parent": "523"},
+ "651": {"name": "保健器材", "parent": "524"},
+ "652": {"name": "性保健品", "parent": "524"},
+ "653": {"name": "医药保养", "parent": "524"},
+ "654": {"name": "医用保健", "parent": "524"},
+ "655": {"name": "酒店", "parent": "525"},
+ "656": {"name": "餐饮", "parent": "525"},
+ "657": {"name": "旅游", "parent": "525"},
+ "658": {"name": "生活服务", "parent": "525"},
+ "659": {"name": "保健服务", "parent": "525"},
+ "660": {"name": "运动健身", "parent": "525"},
+ "661": {"name": "家政服务", "parent": "525"},
+ "662": {"name": "婚庆服务", "parent": "525"},
+ "663": {"name": "租赁服务", "parent": "525"},
+ "664": {"name": "维修服务", "parent": "525"},
+ "665": {"name": "石油天然气", "parent": "526"},
+ "666": {"name": "电力", "parent": "526"},
+ "667": {"name": "新能源", "parent": "526"},
+ "668": {"name": "水利", "parent": "526"},
+ "669": {"name": "矿产", "parent": "526"},
+ "670": {"name": "采掘业", "parent": "526"},
+ "671": {"name": "冶炼", "parent": "526"},
+ "672": {"name": "环保", "parent": "526"},
+ "673": {"name": "无机化工原料", "parent": "527"},
+ "674": {"name": "有机化工原料", "parent": "527"},
+ "675": {"name": "精细化学品", "parent": "527"},
+ "676": {"name": "化工设备", "parent": "527"},
+ "677": {"name": "化工工程", "parent": "527"},
+ "678": {"name": "资产管理", "parent": "513"},
+ "679": {"name": "金融租赁", "parent": "513"},
+ "680": {"name": "征信及信评机构", "parent": "513"},
+ "681": {"name": "资产评估机构", "parent": "513"},
+ "683": {"name": "金融监管机构", "parent": "513"},
+ "684": {"name": "国际贸易", "parent": "521"},
+ "685": {"name": "海关", "parent": "521"},
+ "686": {"name": "购物中心", "parent": "536"},
+ "687": {"name": "超市", "parent": "536"},
+ "688": {"name": "便利店", "parent": "536"},
+ "689": {"name": "专卖店", "parent": "536"},
+ "690": {"name": "专业店", "parent": "536"},
+ "691": {"name": "百货店", "parent": "536"},
+ "692": {"name": "杂货店", "parent": "536"},
+ "693": {"name": "个人银行", "parent": "537"},
+ "695": {"name": "私人银行", "parent": "537"},
+ "696": {"name": "公司银行", "parent": "537"},
+ "697": {"name": "投资银行", "parent": "537"},
+ "698": {"name": "政策性银行", "parent": "537"},
+ "699": {"name": "中央银行", "parent": "537"},
+ "700": {"name": "人寿险", "parent": "538"},
+ "701": {"name": "财产险", "parent": "538"},
+ "702": {"name": "再保险", "parent": "538"},
+ "703": {"name": "养老险", "parent": "538"},
+ "704": {"name": "保险代理公司", "parent": "538"},
+ "705": {"name": "公募基金", "parent": "540"},
+ "707": {"name": "私募基金", "parent": "540"},
+ "708": {"name": "第三方理财", "parent": "679"},
+ "709": {"name": "资产管理公司", "parent": "679"},
+ "711": {"name": "房产中介", "parent": "566"},
+ "712": {"name": "职业中介", "parent": "566"},
+ "713": {"name": "婚姻中介", "parent": "566"},
+ "714": {"name": "战略咨询", "parent": "567"},
+ "715": {"name": "投资咨询", "parent": "567"},
+ "716": {"name": "心理咨询", "parent": "567"},
+ "717": {"name": "留学移民咨询", "parent": "567"},
+ "718": {"name": "工商注册代理", "parent": "568"},
+ "719": {"name": "商标专利代理", "parent": "568"},
+ "720": {"name": "财务代理", "parent": "568"},
+ "721": {"name": "工程机械", "parent": "620"},
+ "722": {"name": "农业机械", "parent": "620"},
+ "723": {"name": "海工设备", "parent": "620"},
+ "724": {"name": "包装机械", "parent": "620"},
+ "725": {"name": "印刷机械", "parent": "620"},
+ "726": {"name": "数控机床", "parent": "620"},
+ "727": {"name": "矿山机械", "parent": "620"},
+ "728": {"name": "水泵", "parent": "621"},
+ "729": {"name": "管道", "parent": "621"},
+ "730": {"name": "阀门", "parent": "621"},
+ "732": {"name": "压缩机", "parent": "621"},
+ "733": {"name": "集散控制系统", "parent": "622"},
+ "734": {"name": "远程控制", "parent": "622"},
+ "735": {"name": "液压系统", "parent": "622"},
+ "736": {"name": "楼宇智能化", "parent": "622"},
+ "737": {"name": "飞机制造", "parent": "624"},
+ "738": {"name": "航空公司", "parent": "624"},
+ "739": {"name": "发动机", "parent": "624"},
+ "740": {"name": "复合材料", "parent": "624"},
+ "741": {"name": "高铁", "parent": "625"},
+ "742": {"name": "地铁", "parent": "625"},
+ "743": {"name": "信号传输", "parent": "625"},
+ "745": {"name": "结构材料", "parent": "627"},
+ "746": {"name": "装饰材料", "parent": "627"},
+ "747": {"name": "专用材料", "parent": "627"},
+ "749": {"name": "经销商集团", "parent": "629"},
+ "750": {"name": "整车制造", "parent": "629"},
+ "751": {"name": "汽车零配件", "parent": "629"},
+ "752": {"name": "外型设计", "parent": "629"},
+ "753": {"name": "平版印刷", "parent": "630"},
+ "754": {"name": "凸版印刷", "parent": "630"},
+ "755": {"name": "凹版印刷", "parent": "630"},
+ "756": {"name": "孔版印刷", "parent": "630"},
+ "757": {"name": "印刷用纸", "parent": "631"},
+ "758": {"name": "书写、制图及复制用纸", "parent": "631"},
+ "759": {"name": "包装用纸", "parent": "631"},
+ "760": {"name": "生活、卫生及装饰用纸", "parent": "631"},
+ "761": {"name": "技术用纸", "parent": "631"},
+ "762": {"name": "加工纸原纸", "parent": "631"},
+ "763": {"name": "食品包装", "parent": "632"},
+ "764": {"name": "医药包装", "parent": "632"},
+ "765": {"name": "日化包装", "parent": "632"},
+ "766": {"name": "物流包装", "parent": "632"},
+ "767": {"name": "礼品包装", "parent": "632"},
+ "768": {"name": "电子五金包装", "parent": "632"},
+ "769": {"name": "汽车服务", "parent": "525"},
+ "770": {"name": "汽车保养", "parent": "769"},
+ "771": {"name": "租车", "parent": "769"},
+ "773": {"name": "出租车", "parent": "769"},
+ "774": {"name": "代驾", "parent": "769"},
+ "775": {"name": "发电", "parent": "666"},
+ "777": {"name": "输配电", "parent": "666"},
+ "779": {"name": "风电", "parent": "667"},
+ "780": {"name": "光伏/太阳能", "parent": "667"},
+ "781": {"name": "生物质发电", "parent": "667"},
+ "782": {"name": "煤化工", "parent": "667"},
+ "783": {"name": "垃圾发电", "parent": "667"},
+ "784": {"name": "核电", "parent": "667"},
+ "785": {"name": "能源矿产", "parent": "669"},
+ "786": {"name": "金属矿产", "parent": "669"},
+ "787": {"name": "非金属矿产", "parent": "669"},
+ "788": {"name": "水气矿产", "parent": "669"},
+ "789": {"name": "锅炉", "parent": "775"},
+ "790": {"name": "发电机", "parent": "775"},
+ "791": {"name": "汽轮机", "parent": "775"},
+ "792": {"name": "燃机", "parent": "775"},
+ "793": {"name": "冷却", "parent": "775"},
+ "794": {"name": "电力设计院", "parent": "775"},
+ "795": {"name": "高压输配电", "parent": "777"},
+ "796": {"name": "中压输配电", "parent": "777"},
+ "797": {"name": "低压输配电", "parent": "777"},
+ "798": {"name": "继电保护", "parent": "777"},
+ "799": {"name": "智能电网", "parent": "777"},
+ "800": {"name": "小学", "parent": "516"},
+ "801": {"name": "电动车", "parent": "519"},
+ "802": {"name": "皮具箱包", "parent": "518"},
+ "803": {"name": "医药制造", "parent": "522"},
+ "804": {"name": "电器销售", "parent": "536"},
+ "805": {"name": "塑料制品", "parent": "527"},
+ "806": {"name": "公益基金会", "parent": "530"},
+ "807": {"name": "美发服务", "parent": "525"},
+ "808": {"name": "农业养殖", "parent": "531"},
+ "809": {"name": "金融服务", "parent": "513"},
+ "810": {"name": "商业地产综合体", "parent": "514"},
+ "811": {"name": "美容服务", "parent": "525"},
+ "812": {"name": "灯饰", "parent": "518"},
+ "813": {"name": "油墨颜料产品", "parent": "527"},
+ "814": {"name": "眼镜制造", "parent": "518"},
+ "815": {"name": "农业生物技术", "parent": "531"},
+ "816": {"name": "体育用品", "parent": "518"},
+ "817": {"name": "保健用品", "parent": "524"},
+ "818": {"name": "化学化工产品", "parent": "527"},
+ "819": {"name": "饲料", "parent": "531"},
+ "821": {"name": "保安服务", "parent": "525"},
+ "822": {"name": "干细胞技术", "parent": "522"},
+ "824": {"name": "农药化肥", "parent": "527"},
+ "825": {"name": "卫生洁具", "parent": "518"},
+ "826": {"name": "体育器材、场馆", "parent": "518"},
+ "827": {"name": "饲料加工", "parent": "531"},
+ "828": {"name": "测绘服务", "parent": "529"},
+ "830": {"name": "金属船舶制造", "parent": "519"},
+ "831": {"name": "基因工程", "parent": "522"},
+ "832": {"name": "花卉服务", "parent": "536"},
+ "833": {"name": "农业种植", "parent": "531"},
+ "834": {"name": "皮革制品", "parent": "518"},
+ "835": {"name": "地理信息加工服务", "parent": "529"},
+ "836": {"name": "机器人", "parent": "519"},
+ "837": {"name": "礼品", "parent": "518"},
+ "838": {"name": "理发及美容服务", "parent": "525"},
+ "839": {"name": "其他清洁服务", "parent": "525"},
+ "840": {"name": "硅胶材料", "parent": "527"},
+ "841": {"name": "茶叶销售", "parent": "518"},
+ "842": {"name": "彩票活动", "parent": "529"},
+ "843": {"name": "化妆培训", "parent": "516"},
+ "844": {"name": "鞋业", "parent": "518"},
+ "845": {"name": "酒店用品", "parent": "518"},
+ "846": {"name": "复合材料", "parent": "527"},
+ "847": {"name": "房地产工程建设", "parent": "548"},
+ "848": {"name": "知识产权服务", "parent": "559"},
+ "849": {"name": "新型建材", "parent": "627"},
+ "850": {"name": "企业投资咨询", "parent": "567"},
+ "851": {"name": "含乳饮料和植物蛋白饮料制造", "parent": "594"},
+ "852": {"name": "汽车检测设备", "parent": "629"},
+ "853": {"name": "手机通讯器材", "parent": "417"},
+ "854": {"name": "环保材料", "parent": "672"},
+ "855": {"name": "交通设施", "parent": "554"},
+ "856": {"name": "电子器件", "parent": "419"},
+ "857": {"name": "啤酒", "parent": "594"},
+ "858": {"name": "生态旅游", "parent": "657"},
+ "859": {"name": "自动化设备", "parent": "626"},
+ "860": {"name": "软件开发", "parent": "414"},
+ "861": {"name": "葡萄酒销售", "parent": "594"},
+ "862": {"name": "钢材", "parent": "633"},
+ "863": {"name": "餐饮培训", "parent": "656"},
+ "864": {"name": "速冻食品", "parent": "593"},
+ "865": {"name": "空气环保", "parent": "672"},
+ "866": {"name": "互联网房地产经纪服务", "parent": "550"},
+ "867": {"name": "食品添加剂", "parent": "593"},
+ "868": {"name": "演艺传播", "parent": "585"},
+ "869": {"name": "信用卡", "parent": "537"},
+ "870": {"name": "报纸期刊广告", "parent": "579"},
+ "871": {"name": "摄影", "parent": "525"},
+ "872": {"name": "手机软件", "parent": "414"},
+ "873": {"name": "地坪建材", "parent": "627"},
+ "874": {"name": "企业管理咨询", "parent": "567"},
+ "875": {"name": "幼儿教育", "parent": "570"},
+ "876": {"name": "系统集成", "parent": "416"},
+ "877": {"name": "皮革服饰", "parent": "597"},
+ "878": {"name": "保健食品", "parent": "593"},
+ "879": {"name": "叉车", "parent": "620"},
+ "880": {"name": "厨卫电器", "parent": "601"},
+ "882": {"name": "地暖设备", "parent": "627"},
+ "883": {"name": "钢结构制造", "parent": "548"},
+ "884": {"name": "投影机", "parent": "606"},
+ "885": {"name": "啤酒销售", "parent": "594"},
+ "886": {"name": "度假村旅游", "parent": "657"},
+ "887": {"name": "电力元件设备", "parent": "626"},
+ "888": {"name": "管理软件", "parent": "414"},
+ "889": {"name": "轴承", "parent": "628"},
+ "890": {"name": "餐饮设备", "parent": "656"},
+ "891": {"name": "肉制品及副产品加工", "parent": "593"},
+ "892": {"name": "艺术收藏品投资交易", "parent": "584"},
+ "893": {"name": "净水器", "parent": "601"},
+ "894": {"name": "进口食品", "parent": "593"},
+ "895": {"name": "娱乐文化传播", "parent": "585"},
+ "896": {"name": "文化传播", "parent": "585"},
+ "897": {"name": "商旅传媒", "parent": "580"},
+ "898": {"name": "广告设计制作", "parent": "579"},
+ "899": {"name": "金属丝绳及其制品制造", "parent": "627"},
+ "900": {"name": "建筑涂料", "parent": "627"},
+ "901": {"name": "抵押贷款", "parent": "543"},
+ "902": {"name": "早教", "parent": "570"},
+ "903": {"name": "电影放映", "parent": "583"},
+ "904": {"name": "内衣服饰", "parent": "597"},
+ "905": {"name": "无线网络通信", "parent": "418"},
+ "906": {"name": "记忆卡", "parent": "415"},
+ "907": {"name": "女装服饰", "parent": "597"},
+ "908": {"name": "建筑机械", "parent": "620"},
+ "909": {"name": "制冷电器", "parent": "601"},
+ "910": {"name": "通信设备", "parent": "417"},
+ "911": {"name": "空调设备", "parent": "601"},
+ "912": {"name": "建筑装饰", "parent": "553"},
+ "913": {"name": "办公设备", "parent": "603"},
+ "916": {"name": "数据处理软件", "parent": "414"},
+ "917": {"name": "葡萄酒贸易", "parent": "594"},
+ "918": {"name": "通讯器材", "parent": "417"},
+ "919": {"name": "铜业", "parent": "633"},
+ "920": {"name": "食堂", "parent": "656"},
+ "921": {"name": "糖果零食", "parent": "593"},
+ "922": {"name": "文化艺术传播", "parent": "584"},
+ "923": {"name": "太阳能电器", "parent": "601"},
+ "924": {"name": "药品零售", "parent": "645"},
+ "925": {"name": "果蔬食品", "parent": "593"},
+ "926": {"name": "文化活动策划", "parent": "585"},
+ "928": {"name": "汽车广告", "parent": "657"},
+ "929": {"name": "条码设备", "parent": "630"},
+ "930": {"name": "建筑石材", "parent": "627"},
+ "931": {"name": "贵金属", "parent": "545"},
+ "932": {"name": "体育", "parent": "660"},
+ "933": {"name": "金融信息服务", "parent": "414"},
+ "934": {"name": "玻璃建材", "parent": "627"},
+ "935": {"name": "家教", "parent": "569"},
+ "936": {"name": "歌舞厅娱乐活动", "parent": "586"},
+ "937": {"name": "计算机服务器", "parent": "415"},
+ "938": {"name": "管道", "parent": "627"},
+ "939": {"name": "婴幼儿服饰", "parent": "597"},
+ "940": {"name": "热水器", "parent": "601"},
+ "941": {"name": "计算机及零部件制造", "parent": "415"},
+ "942": {"name": "钢铁贸易", "parent": "633"},
+ "944": {"name": "包装材料", "parent": "632"},
+ "945": {"name": "计算机办公设备", "parent": "603"},
+ "946": {"name": "白酒", "parent": "594"},
+ "948": {"name": "发动机", "parent": "620"},
+ "949": {"name": "快餐服务", "parent": "656"},
+ "950": {"name": "酒类销售", "parent": "594"},
+ "951": {"name": "电子产品、机电设备", "parent": "626"},
+ "952": {"name": "激光设备", "parent": "626"},
+ "953": {"name": "餐饮策划", "parent": "656"},
+ "954": {"name": "饮料、食品", "parent": "594"},
+ "955": {"name": "文化娱乐经纪", "parent": "585"},
+ "956": {"name": "天然气", "parent": "665"},
+ "957": {"name": "农副食品", "parent": "593"},
+ "958": {"name": "艺术表演", "parent": "585"},
+ "959": {"name": "石膏、水泥制品及类似制品制造", "parent": "627"},
+ "960": {"name": "橱柜", "parent": "602"},
+ "961": {"name": "管理培训", "parent": "577"},
+ "962": {"name": "男装服饰", "parent": "597"},
+ "963": {"name": "化肥制造", "parent": "675"},
+ "964": {"name": "童装服饰", "parent": "597"},
+ "965": {"name": "电源电池", "parent": "626"},
+ "966": {"name": "家电维修", "parent": "664"},
+ "967": {"name": "光电子器件", "parent": "419"},
+ "968": {"name": "旅行社服务", "parent": "657"},
+ "969": {"name": "电线、电缆制造", "parent": "626"},
+ "970": {"name": "软件开发、信息系统集成", "parent": "419"},
+ "971": {"name": "白酒制造", "parent": "594"},
+ "973": {"name": "甜品服务", "parent": "656"},
+ "974": {"name": "糕点、面包制造", "parent": "593"},
+ "975": {"name": "木工机械", "parent": "620"},
+ "976": {"name": "酒吧服务", "parent": "656"},
+ "977": {"name": "火腿肠", "parent": "593"},
+ "978": {"name": "广告策划推广", "parent": "579"},
+ "979": {"name": "新能源产品和生产装备制造", "parent": "667"},
+ "980": {"name": "调味品", "parent": "593"},
+ "981": {"name": "礼仪表演", "parent": "585"},
+ "982": {"name": "劳务派遣", "parent": "560"},
+ "983": {"name": "建材零售", "parent": "627"},
+ "984": {"name": "商品交易中心", "parent": "545"},
+ "985": {"name": "体育推广", "parent": "585"},
+ "986": {"name": "茶饮料及其他饮料制造", "parent": "594"},
+ "987": {"name": "金属建材", "parent": "627"},
+ "988": {"name": "职业技能培训", "parent": "571"},
+ "989": {"name": "网吧活动", "parent": "586"},
+ "990": {"name": "洗衣服务", "parent": "658"},
+ "991": {"name": "管道工程", "parent": "554"},
+ "992": {"name": "通信工程", "parent": "417"},
+ "993": {"name": "电子元器件", "parent": "626"},
+ "994": {"name": "电子设备", "parent": "419"},
+ "995": {"name": "茶馆服务", "parent": "656"},
+ "996": {"name": "旅游开发", "parent": "657"},
+ "997": {"name": "视频通讯", "parent": "417"},
+ "998": {"name": "白酒销售", "parent": "594"},
+ "1000": {"name": "咖啡馆服务", "parent": "656"},
+ "1001": {"name": "食品零售", "parent": "593"},
+ "1002": {"name": "健康疗养旅游", "parent": "655"},
+ "1003": {"name": "粮油食品", "parent": "593"},
+ "1004": {"name": "儿童教育影视", "parent": "583"},
+ "1005": {"name": "新能源发电", "parent": "667"},
+ "1006": {"name": "旅游策划", "parent": "657"},
+ "1007": {"name": "绘画", "parent": "575"},
+ "1008": {"name": "方便面及其他方便食品", "parent": "593"},
+ "1009": {"name": "房地产经纪", "parent": "550"},
+ "1010": {"name": "母婴家政", "parent": "661"},
+ "1011": {"name": "居家养老健康服务", "parent": "661"},
+ "1012": {"name": "文化艺术投资", "parent": "545"},
+ "1013": {"name": "运动健身", "parent": "660"},
+ "1014": {"name": "瓶(罐)装饮用水制造", "parent": "594"},
+ "1015": {"name": "金属门窗", "parent": "627"},
+ "1016": {"name": "机动车检测", "parent": "563"},
+ "1017": {"name": "货物运输", "parent": "634"},
+ "1018": {"name": "服饰专卖", "parent": "690"},
+ "1019": {"name": "酒店服装", "parent": "597"},
+ "1020": {"name": "通讯软件", "parent": "417"},
+ "1021": {"name": "消防工程", "parent": "554"},
+ "1022": {"name": "嵌入式电子系统", "parent": "419"},
+ "1023": {"name": "航空票务", "parent": "636"},
+ "1024": {"name": "电气设备", "parent": "626"},
+ "1025": {"name": "酒业贸易", "parent": "594"},
+ "1027": {"name": "其他饮料及冷饮服务", "parent": "656"},
+ "1028": {"name": "乳制品", "parent": "593"},
+ "1029": {"name": "新闻期刊出版", "parent": "588"},
+ "1030": {"name": "水污染治理", "parent": "672"},
+ "1031": {"name": "谷物食品", "parent": "593"},
+ "1032": {"name": "数字动漫设计制造服务", "parent": "590"},
+ "1033": {"name": "医院", "parent": "646"},
+ "1034": {"name": "旅游广告", "parent": "657"},
+ "1035": {"name": "办公家具", "parent": "602"},
+ "1036": {"name": "房地产营销策划", "parent": "550"},
+ "1037": {"name": "保洁家政", "parent": "661"},
+ "1038": {"name": "水泥制造", "parent": "627"},
+ "1039": {"name": "市场研究咨询", "parent": "567"},
+ "1040": {"name": "驾校", "parent": "571"},
+ "1041": {"name": "正餐服务", "parent": "656"},
+ "1043": {"name": "机动车燃油", "parent": "665"},
+ "1044": {"name": "食品", "parent": "593"},
+ "1045": {"name": "新能源汽车", "parent": "629"},
+ "1046": {"name": "手机无线网络推广", "parent": "417"},
+ "1047": {"name": "环保设备", "parent": "672"},
+ "1048": {"name": "通讯工程", "parent": "418"},
+ "1049": {"name": "半导体集成电路", "parent": "419"},
+ "1050": {"name": "航空服务", "parent": "636"},
+ "1051": {"name": "电机设备", "parent": "626"},
+ "1052": {"name": "档案软件", "parent": "414"},
+ "1053": {"name": "冷链物流服务", "parent": "634"},
+ "1054": {"name": "小吃服务", "parent": "656"},
+ "1055": {"name": "水产品加工", "parent": "593"},
+ "1056": {"name": "图书出版", "parent": "588"},
+ "1057": {"name": "固体废物治理", "parent": "672"},
+ "1059": {"name": "坚果食品", "parent": "593"},
+ "1060": {"name": "广告传媒", "parent": "579"},
+ "1061": {"name": "电梯", "parent": "622"},
+ "1062": {"name": "社区医疗与卫生院", "parent": "646"},
+ "1063": {"name": "广告、印刷包装", "parent": "630"},
+ "1064": {"name": "婚纱礼服", "parent": "662"},
+ "1065": {"name": "地毯", "parent": "602"},
+ "1066": {"name": "互联网物业", "parent": "551"},
+ "1067": {"name": "跨境电商", "parent": "3"},
+ "1068": {"name": "信息安全、系统集成", "parent": "9"},
+ "1069": {"name": "专用汽车制造", "parent": "750"},
+ "1070": {"name": "商品贸易", "parent": "3"},
+ "1071": {"name": "墙壁装饰材料", "parent": "746"},
+ "1072": {"name": "窗帘装饰材料", "parent": "746"},
+ "1073": {"name": "电子商务、本地生活服务", "parent": "3"},
+ "1075": {"name": "白酒电子商务", "parent": "3"},
+ "1076": {"name": "商品贸易、电子商务", "parent": "3"},
+ "1077": {"name": "木质装饰材料", "parent": "746"},
+ "1078": {"name": "电子商务、汽车电商交易平台", "parent": "3"},
+ "1079": {"name": "汽车轮胎", "parent": "751"},
+ "1080": {"name": "气体压缩机械制造", "parent": "732"},
+ "1081": {"name": "家装家具电子商务", "parent": "3"},
+ "1082": {"name": "化妆品电子商务", "parent": "3"},
+ "1083": {"name": "汽车销售", "parent": "749"},
+ "1084": {"name": "新闻资讯网站", "parent": "510"},
+ "1085": {"name": "母婴电商", "parent": "3"},
+ "1086": {"name": "电商商务、收藏品交易", "parent": "3"},
+ "1088": {"name": "电子商务、数码产品", "parent": "3"},
+ "1089": {"name": "二手车交易", "parent": "749"},
+ "1090": {"name": "游戏制作服务", "parent": "5"},
+ "1091": {"name": "母婴服务", "parent": "510"},
+ "1092": {"name": "家具电子商务", "parent": "3"},
+ "1093": {"name": "汽车配件电子商务", "parent": "3"},
+ "1094": {"name": "输配电设备", "parent": "777"},
+ "1095": {"name": "矿山设备", "parent": "727"},
+ "1096": {"name": "机床机械", "parent": "726"},
+ "1097": {"name": "农产品电商", "parent": "3"},
+ "1098": {"name": "陶瓷装饰材料", "parent": "746"},
+ "1099": {"name": "车载联网设备", "parent": "487"},
+ "1100": {"name": "汽车销售电子商务", "parent": "3"},
+ "1101": {"name": "石油设备", "parent": "730"},
+ "1102": {"name": "智能家居", "parent": "487"},
+ "1103": {"name": "散热器", "parent": "751"},
+ "1104": {"name": "电力工程", "parent": "775"},
+ "1105": {"name": "生鲜电商", "parent": "3"},
+ "1106": {"name": "互联网数据服务", "parent": "490"},
+ "1107": {"name": "房车、商务车销售", "parent": "749"},
+ "1108": {"name": "茶叶电子商务", "parent": "3"},
+ "1109": {"name": "酒类电子商务", "parent": "3"},
+ "1110": {"name": "阀门", "parent": "730"},
+ "1111": {"name": "食品电商", "parent": "3"},
+ "1112": {"name": "儿童摄影", "parent": "871"},
+ "1113": {"name": "广告摄影", "parent": "871"},
+ "1114": {"name": "婚纱摄影", "parent": "871"},
+ "1115": {"name": "模具制造", "parent": "620"},
+ "1116": {"name": "汽车模具", "parent": "629"},
+ "1117": {"name": "认证咨询", "parent": "567"},
+ "1118": {"name": "数字视觉制作服务", "parent": "590"},
+ "1119": {"name": "牙科及医疗器械", "parent": "646"},
+ "1120": {"name": "猎头招聘", "parent": "560"},
+ "1121": {"name": "家居", "parent": "518"},
+ "1122": {"name": "收藏品", "parent": "518"},
+ "1123": {"name": "首饰", "parent": "518"},
+ "1124": {"name": "工艺品", "parent": "518"},
+ "1125": {"name": "财务", "parent": "515"},
+ "1126": {"name": "税务", "parent": "515"},
+ "1127": {"name": "分类信息", "parent": "2"},
+ "1128": {"name": "宠物", "parent": "0"},
+ "1129": {"name": "快消品", "parent": "518"},
+ "1130": {"name": "人工智能", "parent": "2"},
+ "1131": {"name": "农/林/牧/渔", "parent": "0"},
}
+
def get_names(id):
id = str(id)
nms = []
d = TBL.get(id)
- if not d:return []
+ if not d:
+ return []
nms.append(d["name"])
p = get_names(d["parent"])
- if p: nms.extend(p)
+ if p:
+ nms.extend(p)
return nms
+
if __name__ == "__main__":
print(get_names("1119"))
diff --git a/deepdoc/parser/resume/entities/regions.py b/deepdoc/parser/resume/entities/regions.py
index e1707530b48f75f8bf7bb17565eb417748da1878..4c3f0d722fc1277aca1fcd6fe5842019a6a5dbf2 100644
--- a/deepdoc/parser/resume/entities/regions.py
+++ b/deepdoc/parser/resume/entities/regions.py
@@ -10,766 +10,776 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+import re
TBL = {
-"2":{"name":"北京","parent":"1"},
-"3":{"name":"天津","parent":"1"},
-"4":{"name":"河北","parent":"1"},
-"5":{"name":"山西","parent":"1"},
-"6":{"name":"内蒙古","parent":"1"},
-"7":{"name":"辽宁","parent":"1"},
-"8":{"name":"吉林","parent":"1"},
-"9":{"name":"黑龙江","parent":"1"},
-"10":{"name":"上海","parent":"1"},
-"11":{"name":"江苏","parent":"1"},
-"12":{"name":"浙江","parent":"1"},
-"13":{"name":"安徽","parent":"1"},
-"14":{"name":"福建","parent":"1"},
-"15":{"name":"江西","parent":"1"},
-"16":{"name":"山东","parent":"1"},
-"17":{"name":"河南","parent":"1"},
-"18":{"name":"湖北","parent":"1"},
-"19":{"name":"湖南","parent":"1"},
-"20":{"name":"广东","parent":"1"},
-"21":{"name":"广西","parent":"1"},
-"22":{"name":"海南","parent":"1"},
-"23":{"name":"重庆","parent":"1"},
-"24":{"name":"四川","parent":"1"},
-"25":{"name":"贵州","parent":"1"},
-"26":{"name":"云南","parent":"1"},
-"27":{"name":"西藏","parent":"1"},
-"28":{"name":"陕西","parent":"1"},
-"29":{"name":"甘肃","parent":"1"},
-"30":{"name":"青海","parent":"1"},
-"31":{"name":"宁夏","parent":"1"},
-"32":{"name":"新疆","parent":"1"},
-"33":{"name":"北京市","parent":"2"},
-"34":{"name":"天津市","parent":"3"},
-"35":{"name":"石家庄市","parent":"4"},
-"36":{"name":"唐山市","parent":"4"},
-"37":{"name":"秦皇岛市","parent":"4"},
-"38":{"name":"邯郸市","parent":"4"},
-"39":{"name":"邢台市","parent":"4"},
-"40":{"name":"保定市","parent":"4"},
-"41":{"name":"张家口市","parent":"4"},
-"42":{"name":"承德市","parent":"4"},
-"43":{"name":"沧州市","parent":"4"},
-"44":{"name":"廊坊市","parent":"4"},
-"45":{"name":"衡水市","parent":"4"},
-"46":{"name":"太原市","parent":"5"},
-"47":{"name":"大同市","parent":"5"},
-"48":{"name":"阳泉市","parent":"5"},
-"49":{"name":"长治市","parent":"5"},
-"50":{"name":"晋城市","parent":"5"},
-"51":{"name":"朔州市","parent":"5"},
-"52":{"name":"晋中市","parent":"5"},
-"53":{"name":"运城市","parent":"5"},
-"54":{"name":"忻州市","parent":"5"},
-"55":{"name":"临汾市","parent":"5"},
-"56":{"name":"吕梁市","parent":"5"},
-"57":{"name":"呼和浩特市","parent":"6"},
-"58":{"name":"包头市","parent":"6"},
-"59":{"name":"乌海市","parent":"6"},
-"60":{"name":"赤峰市","parent":"6"},
-"61":{"name":"通辽市","parent":"6"},
-"62":{"name":"鄂尔多斯市","parent":"6"},
-"63":{"name":"呼伦贝尔市","parent":"6"},
-"64":{"name":"巴彦淖尔市","parent":"6"},
-"65":{"name":"乌兰察布市","parent":"6"},
-"66":{"name":"兴安盟","parent":"6"},
-"67":{"name":"锡林郭勒盟","parent":"6"},
-"68":{"name":"阿拉善盟","parent":"6"},
-"69":{"name":"沈阳市","parent":"7"},
-"70":{"name":"大连市","parent":"7"},
-"71":{"name":"鞍山市","parent":"7"},
-"72":{"name":"抚顺市","parent":"7"},
-"73":{"name":"本溪市","parent":"7"},
-"74":{"name":"丹东市","parent":"7"},
-"75":{"name":"锦州市","parent":"7"},
-"76":{"name":"营口市","parent":"7"},
-"77":{"name":"阜新市","parent":"7"},
-"78":{"name":"辽阳市","parent":"7"},
-"79":{"name":"盘锦市","parent":"7"},
-"80":{"name":"铁岭市","parent":"7"},
-"81":{"name":"朝阳市","parent":"7"},
-"82":{"name":"葫芦岛市","parent":"7"},
-"83":{"name":"长春市","parent":"8"},
-"84":{"name":"吉林市","parent":"8"},
-"85":{"name":"四平市","parent":"8"},
-"86":{"name":"辽源市","parent":"8"},
-"87":{"name":"通化市","parent":"8"},
-"88":{"name":"白山市","parent":"8"},
-"89":{"name":"松原市","parent":"8"},
-"90":{"name":"白城市","parent":"8"},
-"91":{"name":"延边朝鲜族自治州","parent":"8"},
-"92":{"name":"哈尔滨市","parent":"9"},
-"93":{"name":"齐齐哈尔市","parent":"9"},
-"94":{"name":"鸡西市","parent":"9"},
-"95":{"name":"鹤岗市","parent":"9"},
-"96":{"name":"双鸭山市","parent":"9"},
-"97":{"name":"大庆市","parent":"9"},
-"98":{"name":"伊春市","parent":"9"},
-"99":{"name":"佳木斯市","parent":"9"},
-"100":{"name":"七台河市","parent":"9"},
-"101":{"name":"牡丹江市","parent":"9"},
-"102":{"name":"黑河市","parent":"9"},
-"103":{"name":"绥化市","parent":"9"},
-"104":{"name":"大兴安岭地区","parent":"9"},
-"105":{"name":"上海市","parent":"10"},
-"106":{"name":"南京市","parent":"11"},
-"107":{"name":"无锡市","parent":"11"},
-"108":{"name":"徐州市","parent":"11"},
-"109":{"name":"常州市","parent":"11"},
-"110":{"name":"苏州市","parent":"11"},
-"111":{"name":"南通市","parent":"11"},
-"112":{"name":"连云港市","parent":"11"},
-"113":{"name":"淮安市","parent":"11"},
-"114":{"name":"盐城市","parent":"11"},
-"115":{"name":"扬州市","parent":"11"},
-"116":{"name":"镇江市","parent":"11"},
-"117":{"name":"泰州市","parent":"11"},
-"118":{"name":"宿迁市","parent":"11"},
-"119":{"name":"杭州市","parent":"12"},
-"120":{"name":"宁波市","parent":"12"},
-"121":{"name":"温州市","parent":"12"},
-"122":{"name":"嘉兴市","parent":"12"},
-"123":{"name":"湖州市","parent":"12"},
-"124":{"name":"绍兴市","parent":"12"},
-"125":{"name":"金华市","parent":"12"},
-"126":{"name":"衢州市","parent":"12"},
-"127":{"name":"舟山市","parent":"12"},
-"128":{"name":"台州市","parent":"12"},
-"129":{"name":"丽水市","parent":"12"},
-"130":{"name":"合肥市","parent":"13"},
-"131":{"name":"芜湖市","parent":"13"},
-"132":{"name":"蚌埠市","parent":"13"},
-"133":{"name":"淮南市","parent":"13"},
-"134":{"name":"马鞍山市","parent":"13"},
-"135":{"name":"淮北市","parent":"13"},
-"136":{"name":"铜陵市","parent":"13"},
-"137":{"name":"安庆市","parent":"13"},
-"138":{"name":"黄山市","parent":"13"},
-"139":{"name":"滁州市","parent":"13"},
-"140":{"name":"阜阳市","parent":"13"},
-"141":{"name":"宿州市","parent":"13"},
-"143":{"name":"六安市","parent":"13"},
-"144":{"name":"亳州市","parent":"13"},
-"145":{"name":"池州市","parent":"13"},
-"146":{"name":"宣城市","parent":"13"},
-"147":{"name":"福州市","parent":"14"},
-"148":{"name":"厦门市","parent":"14"},
-"149":{"name":"莆田市","parent":"14"},
-"150":{"name":"三明市","parent":"14"},
-"151":{"name":"泉州市","parent":"14"},
-"152":{"name":"漳州市","parent":"14"},
-"153":{"name":"南平市","parent":"14"},
-"154":{"name":"龙岩市","parent":"14"},
-"155":{"name":"宁德市","parent":"14"},
-"156":{"name":"南昌市","parent":"15"},
-"157":{"name":"景德镇市","parent":"15"},
-"158":{"name":"萍乡市","parent":"15"},
-"159":{"name":"九江市","parent":"15"},
-"160":{"name":"新余市","parent":"15"},
-"161":{"name":"鹰潭市","parent":"15"},
-"162":{"name":"赣州市","parent":"15"},
-"163":{"name":"吉安市","parent":"15"},
-"164":{"name":"宜春市","parent":"15"},
-"165":{"name":"抚州市","parent":"15"},
-"166":{"name":"上饶市","parent":"15"},
-"167":{"name":"济南市","parent":"16"},
-"168":{"name":"青岛市","parent":"16"},
-"169":{"name":"淄博市","parent":"16"},
-"170":{"name":"枣庄市","parent":"16"},
-"171":{"name":"东营市","parent":"16"},
-"172":{"name":"烟台市","parent":"16"},
-"173":{"name":"潍坊市","parent":"16"},
-"174":{"name":"济宁市","parent":"16"},
-"175":{"name":"泰安市","parent":"16"},
-"176":{"name":"威海市","parent":"16"},
-"177":{"name":"日照市","parent":"16"},
-"179":{"name":"临沂市","parent":"16"},
-"180":{"name":"德州市","parent":"16"},
-"181":{"name":"聊城市","parent":"16"},
-"182":{"name":"滨州市","parent":"16"},
-"183":{"name":"菏泽市","parent":"16"},
-"184":{"name":"郑州市","parent":"17"},
-"185":{"name":"开封市","parent":"17"},
-"186":{"name":"洛阳市","parent":"17"},
-"187":{"name":"平顶山市","parent":"17"},
-"188":{"name":"安阳市","parent":"17"},
-"189":{"name":"鹤壁市","parent":"17"},
-"190":{"name":"新乡市","parent":"17"},
-"191":{"name":"焦作市","parent":"17"},
-"192":{"name":"濮阳市","parent":"17"},
-"193":{"name":"许昌市","parent":"17"},
-"194":{"name":"漯河市","parent":"17"},
-"195":{"name":"三门峡市","parent":"17"},
-"196":{"name":"南阳市","parent":"17"},
-"197":{"name":"商丘市","parent":"17"},
-"198":{"name":"信阳市","parent":"17"},
-"199":{"name":"周口市","parent":"17"},
-"200":{"name":"驻马店市","parent":"17"},
-"201":{"name":"武汉市","parent":"18"},
-"202":{"name":"黄石市","parent":"18"},
-"203":{"name":"十堰市","parent":"18"},
-"204":{"name":"宜昌市","parent":"18"},
-"205":{"name":"襄阳市","parent":"18"},
-"206":{"name":"鄂州市","parent":"18"},
-"207":{"name":"荆门市","parent":"18"},
-"208":{"name":"孝感市","parent":"18"},
-"209":{"name":"荆州市","parent":"18"},
-"210":{"name":"黄冈市","parent":"18"},
-"211":{"name":"咸宁市","parent":"18"},
-"212":{"name":"随州市","parent":"18"},
-"213":{"name":"恩施土家族苗族自治州","parent":"18"},
-"215":{"name":"长沙市","parent":"19"},
-"216":{"name":"株洲市","parent":"19"},
-"217":{"name":"湘潭市","parent":"19"},
-"218":{"name":"衡阳市","parent":"19"},
-"219":{"name":"邵阳市","parent":"19"},
-"220":{"name":"岳阳市","parent":"19"},
-"221":{"name":"常德市","parent":"19"},
-"222":{"name":"张家界市","parent":"19"},
-"223":{"name":"益阳市","parent":"19"},
-"224":{"name":"郴州市","parent":"19"},
-"225":{"name":"永州市","parent":"19"},
-"226":{"name":"怀化市","parent":"19"},
-"227":{"name":"娄底市","parent":"19"},
-"228":{"name":"湘西土家族苗族自治州","parent":"19"},
-"229":{"name":"广州市","parent":"20"},
-"230":{"name":"韶关市","parent":"20"},
-"231":{"name":"深圳市","parent":"20"},
-"232":{"name":"珠海市","parent":"20"},
-"233":{"name":"汕头市","parent":"20"},
-"234":{"name":"佛山市","parent":"20"},
-"235":{"name":"江门市","parent":"20"},
-"236":{"name":"湛江市","parent":"20"},
-"237":{"name":"茂名市","parent":"20"},
-"238":{"name":"肇庆市","parent":"20"},
-"239":{"name":"惠州市","parent":"20"},
-"240":{"name":"梅州市","parent":"20"},
-"241":{"name":"汕尾市","parent":"20"},
-"242":{"name":"河源市","parent":"20"},
-"243":{"name":"阳江市","parent":"20"},
-"244":{"name":"清远市","parent":"20"},
-"245":{"name":"东莞市","parent":"20"},
-"246":{"name":"中山市","parent":"20"},
-"247":{"name":"潮州市","parent":"20"},
-"248":{"name":"揭阳市","parent":"20"},
-"249":{"name":"云浮市","parent":"20"},
-"250":{"name":"南宁市","parent":"21"},
-"251":{"name":"柳州市","parent":"21"},
-"252":{"name":"桂林市","parent":"21"},
-"253":{"name":"梧州市","parent":"21"},
-"254":{"name":"北海市","parent":"21"},
-"255":{"name":"防城港市","parent":"21"},
-"256":{"name":"钦州市","parent":"21"},
-"257":{"name":"贵港市","parent":"21"},
-"258":{"name":"玉林市","parent":"21"},
-"259":{"name":"百色市","parent":"21"},
-"260":{"name":"贺州市","parent":"21"},
-"261":{"name":"河池市","parent":"21"},
-"262":{"name":"来宾市","parent":"21"},
-"263":{"name":"崇左市","parent":"21"},
-"264":{"name":"海口市","parent":"22"},
-"265":{"name":"三亚市","parent":"22"},
-"267":{"name":"重庆市","parent":"23"},
-"268":{"name":"成都市","parent":"24"},
-"269":{"name":"自贡市","parent":"24"},
-"270":{"name":"攀枝花市","parent":"24"},
-"271":{"name":"泸州市","parent":"24"},
-"272":{"name":"德阳市","parent":"24"},
-"273":{"name":"绵阳市","parent":"24"},
-"274":{"name":"广元市","parent":"24"},
-"275":{"name":"遂宁市","parent":"24"},
-"276":{"name":"内江市","parent":"24"},
-"277":{"name":"乐山市","parent":"24"},
-"278":{"name":"南充市","parent":"24"},
-"279":{"name":"眉山市","parent":"24"},
-"280":{"name":"宜宾市","parent":"24"},
-"281":{"name":"广安市","parent":"24"},
-"282":{"name":"达州市","parent":"24"},
-"283":{"name":"雅安市","parent":"24"},
-"284":{"name":"巴中市","parent":"24"},
-"285":{"name":"资阳市","parent":"24"},
-"286":{"name":"阿坝藏族羌族自治州","parent":"24"},
-"287":{"name":"甘孜藏族自治州","parent":"24"},
-"288":{"name":"凉山彝族自治州","parent":"24"},
-"289":{"name":"贵阳市","parent":"25"},
-"290":{"name":"六盘水市","parent":"25"},
-"291":{"name":"遵义市","parent":"25"},
-"292":{"name":"安顺市","parent":"25"},
-"293":{"name":"铜仁市","parent":"25"},
-"294":{"name":"黔西南布依族苗族自治州","parent":"25"},
-"295":{"name":"毕节市","parent":"25"},
-"296":{"name":"黔东南苗族侗族自治州","parent":"25"},
-"297":{"name":"黔南布依族苗族自治州","parent":"25"},
-"298":{"name":"昆明市","parent":"26"},
-"299":{"name":"曲靖市","parent":"26"},
-"300":{"name":"玉溪市","parent":"26"},
-"301":{"name":"保山市","parent":"26"},
-"302":{"name":"昭通市","parent":"26"},
-"303":{"name":"丽江市","parent":"26"},
-"304":{"name":"普洱市","parent":"26"},
-"305":{"name":"临沧市","parent":"26"},
-"306":{"name":"楚雄彝族自治州","parent":"26"},
-"307":{"name":"红河哈尼族彝族自治州","parent":"26"},
-"308":{"name":"文山壮族苗族自治州","parent":"26"},
-"309":{"name":"西双版纳傣族自治州","parent":"26"},
-"310":{"name":"大理白族自治州","parent":"26"},
-"311":{"name":"德宏傣族景颇族自治州","parent":"26"},
-"312":{"name":"怒江傈僳族自治州","parent":"26"},
-"313":{"name":"迪庆藏族自治州","parent":"26"},
-"314":{"name":"拉萨市","parent":"27"},
-"315":{"name":"昌都市","parent":"27"},
-"316":{"name":"山南市","parent":"27"},
-"317":{"name":"日喀则市","parent":"27"},
-"318":{"name":"那曲市","parent":"27"},
-"319":{"name":"阿里地区","parent":"27"},
-"320":{"name":"林芝市","parent":"27"},
-"321":{"name":"西安市","parent":"28"},
-"322":{"name":"铜川市","parent":"28"},
-"323":{"name":"宝鸡市","parent":"28"},
-"324":{"name":"咸阳市","parent":"28"},
-"325":{"name":"渭南市","parent":"28"},
-"326":{"name":"延安市","parent":"28"},
-"327":{"name":"汉中市","parent":"28"},
-"328":{"name":"榆林市","parent":"28"},
-"329":{"name":"安康市","parent":"28"},
-"330":{"name":"商洛市","parent":"28"},
-"331":{"name":"兰州市","parent":"29"},
-"332":{"name":"嘉峪关市","parent":"29"},
-"333":{"name":"金昌市","parent":"29"},
-"334":{"name":"白银市","parent":"29"},
-"335":{"name":"天水市","parent":"29"},
-"336":{"name":"武威市","parent":"29"},
-"337":{"name":"张掖市","parent":"29"},
-"338":{"name":"平凉市","parent":"29"},
-"339":{"name":"酒泉市","parent":"29"},
-"340":{"name":"庆阳市","parent":"29"},
-"341":{"name":"定西市","parent":"29"},
-"342":{"name":"陇南市","parent":"29"},
-"343":{"name":"临夏回族自治州","parent":"29"},
-"344":{"name":"甘南藏族自治州","parent":"29"},
-"345":{"name":"西宁市","parent":"30"},
-"346":{"name":"海东市","parent":"30"},
-"347":{"name":"海北藏族自治州","parent":"30"},
-"348":{"name":"黄南藏族自治州","parent":"30"},
-"349":{"name":"海南藏族自治州","parent":"30"},
-"350":{"name":"果洛藏族自治州","parent":"30"},
-"351":{"name":"玉树藏族自治州","parent":"30"},
-"352":{"name":"海西蒙古族藏族自治州","parent":"30"},
-"353":{"name":"银川市","parent":"31"},
-"354":{"name":"石嘴山市","parent":"31"},
-"355":{"name":"吴忠市","parent":"31"},
-"356":{"name":"固原市","parent":"31"},
-"357":{"name":"中卫市","parent":"31"},
-"358":{"name":"乌鲁木齐市","parent":"32"},
-"359":{"name":"克拉玛依市","parent":"32"},
-"360":{"name":"吐鲁番市","parent":"32"},
-"361":{"name":"哈密市","parent":"32"},
-"362":{"name":"昌吉回族自治州","parent":"32"},
-"363":{"name":"博尔塔拉蒙古自治州","parent":"32"},
-"364":{"name":"巴音郭楞蒙古自治州","parent":"32"},
-"365":{"name":"阿克苏地区","parent":"32"},
-"366":{"name":"克孜勒苏柯尔克孜自治州","parent":"32"},
-"367":{"name":"喀什地区","parent":"32"},
-"368":{"name":"和田地区","parent":"32"},
-"369":{"name":"伊犁哈萨克自治州","parent":"32"},
-"370":{"name":"塔城地区","parent":"32"},
-"371":{"name":"阿勒泰地区","parent":"32"},
-"372":{"name":"新疆省直辖行政单位","parent":"32"},
-"373":{"name":"可克达拉市","parent":"32"},
-"374":{"name":"昆玉市","parent":"32"},
-"375":{"name":"胡杨河市","parent":"32"},
-"376":{"name":"双河市","parent":"32"},
-"3560":{"name":"北票市","parent":"7"},
-"3615":{"name":"高州市","parent":"20"},
-"3651":{"name":"济源市","parent":"17"},
-"3662":{"name":"胶南市","parent":"16"},
-"3683":{"name":"老河口市","parent":"18"},
-"3758":{"name":"沙河市","parent":"4"},
-"3822":{"name":"宜城市","parent":"18"},
-"3842":{"name":"枣阳市","parent":"18"},
-"3850":{"name":"肇东市","parent":"9"},
-"3905":{"name":"澳门","parent":"1"},
-"3906":{"name":"澳门","parent":"3905"},
-"3907":{"name":"香港","parent":"1"},
-"3908":{"name":"香港","parent":"3907"},
-"3947":{"name":"仙桃市","parent":"18"},
-"3954":{"name":"台湾","parent":"1"},
-"3955":{"name":"台湾","parent":"3954"},
-"3956":{"name":"海外","parent":"1"},
-"3957":{"name":"海外","parent":"3956"},
-"3958":{"name":"美国","parent":"3956"},
-"3959":{"name":"加拿大","parent":"3956"},
-"3961":{"name":"日本","parent":"3956"},
-"3962":{"name":"韩国","parent":"3956"},
-"3963":{"name":"德国","parent":"3956"},
-"3964":{"name":"英国","parent":"3956"},
-"3965":{"name":"意大利","parent":"3956"},
-"3966":{"name":"西班牙","parent":"3956"},
-"3967":{"name":"法国","parent":"3956"},
-"3968":{"name":"澳大利亚","parent":"3956"},
-"3969":{"name":"东城区","parent":"2"},
-"3970":{"name":"西城区","parent":"2"},
-"3971":{"name":"崇文区","parent":"2"},
-"3972":{"name":"宣武区","parent":"2"},
-"3973":{"name":"朝阳区","parent":"2"},
-"3974":{"name":"海淀区","parent":"2"},
-"3975":{"name":"丰台区","parent":"2"},
-"3976":{"name":"石景山区","parent":"2"},
-"3977":{"name":"门头沟区","parent":"2"},
-"3978":{"name":"房山区","parent":"2"},
-"3979":{"name":"通州区","parent":"2"},
-"3980":{"name":"顺义区","parent":"2"},
-"3981":{"name":"昌平区","parent":"2"},
-"3982":{"name":"大兴区","parent":"2"},
-"3983":{"name":"平谷区","parent":"2"},
-"3984":{"name":"怀柔区","parent":"2"},
-"3985":{"name":"密云区","parent":"2"},
-"3986":{"name":"延庆区","parent":"2"},
-"3987":{"name":"黄浦区","parent":"10"},
-"3988":{"name":"徐汇区","parent":"10"},
-"3989":{"name":"长宁区","parent":"10"},
-"3990":{"name":"静安区","parent":"10"},
-"3991":{"name":"普陀区","parent":"10"},
-"3992":{"name":"闸北区","parent":"10"},
-"3993":{"name":"虹口区","parent":"10"},
-"3994":{"name":"杨浦区","parent":"10"},
-"3995":{"name":"宝山区","parent":"10"},
-"3996":{"name":"闵行区","parent":"10"},
-"3997":{"name":"嘉定区","parent":"10"},
-"3998":{"name":"浦东新区","parent":"10"},
-"3999":{"name":"松江区","parent":"10"},
-"4000":{"name":"金山区","parent":"10"},
-"4001":{"name":"青浦区","parent":"10"},
-"4002":{"name":"奉贤区","parent":"10"},
-"4003":{"name":"崇明区","parent":"10"},
-"4004":{"name":"和平区","parent":"3"},
-"4005":{"name":"河东区","parent":"3"},
-"4006":{"name":"河西区","parent":"3"},
-"4007":{"name":"南开区","parent":"3"},
-"4008":{"name":"红桥区","parent":"3"},
-"4009":{"name":"河北区","parent":"3"},
-"4010":{"name":"滨海新区","parent":"3"},
-"4011":{"name":"东丽区","parent":"3"},
-"4012":{"name":"西青区","parent":"3"},
-"4013":{"name":"北辰区","parent":"3"},
-"4014":{"name":"津南区","parent":"3"},
-"4015":{"name":"武清区","parent":"3"},
-"4016":{"name":"宝坻区","parent":"3"},
-"4017":{"name":"静海区","parent":"3"},
-"4018":{"name":"宁河区","parent":"3"},
-"4019":{"name":"蓟州区","parent":"3"},
-"4020":{"name":"渝中区","parent":"23"},
-"4021":{"name":"江北区","parent":"23"},
-"4022":{"name":"南岸区","parent":"23"},
-"4023":{"name":"沙坪坝区","parent":"23"},
-"4024":{"name":"九龙坡区","parent":"23"},
-"4025":{"name":"大渡口区","parent":"23"},
-"4026":{"name":"渝北区","parent":"23"},
-"4027":{"name":"巴南区","parent":"23"},
-"4028":{"name":"北碚区","parent":"23"},
-"4029":{"name":"万州区","parent":"23"},
-"4030":{"name":"黔江区","parent":"23"},
-"4031":{"name":"永川区","parent":"23"},
-"4032":{"name":"涪陵区","parent":"23"},
-"4033":{"name":"江津区","parent":"23"},
-"4034":{"name":"合川区","parent":"23"},
-"4035":{"name":"双桥区","parent":"23"},
-"4036":{"name":"万盛区","parent":"23"},
-"4037":{"name":"荣昌区","parent":"23"},
-"4038":{"name":"大足区","parent":"23"},
-"4039":{"name":"璧山区","parent":"23"},
-"4040":{"name":"铜梁区","parent":"23"},
-"4041":{"name":"潼南区","parent":"23"},
-"4042":{"name":"綦江区","parent":"23"},
-"4043":{"name":"忠县","parent":"23"},
-"4044":{"name":"开州区","parent":"23"},
-"4045":{"name":"云阳县","parent":"23"},
-"4046":{"name":"梁平区","parent":"23"},
-"4047":{"name":"垫江县","parent":"23"},
-"4048":{"name":"丰都县","parent":"23"},
-"4049":{"name":"奉节县","parent":"23"},
-"4050":{"name":"巫山县","parent":"23"},
-"4051":{"name":"巫溪县","parent":"23"},
-"4052":{"name":"城口县","parent":"23"},
-"4053":{"name":"武隆区","parent":"23"},
-"4054":{"name":"石柱土家族自治县","parent":"23"},
-"4055":{"name":"秀山土家族苗族自治县","parent":"23"},
-"4056":{"name":"酉阳土家族苗族自治县","parent":"23"},
-"4057":{"name":"彭水苗族土家族自治县","parent":"23"},
-"4058":{"name":"潜江市","parent":"18"},
-"4059":{"name":"三沙市","parent":"22"},
-"4060":{"name":"石河子市","parent":"32"},
-"4061":{"name":"阿拉尔市","parent":"32"},
-"4062":{"name":"图木舒克市","parent":"32"},
-"4063":{"name":"五家渠市","parent":"32"},
-"4064":{"name":"北屯市","parent":"32"},
-"4065":{"name":"铁门关市","parent":"32"},
-"4066":{"name":"儋州市","parent":"22"},
-"4067":{"name":"五指山市","parent":"22"},
-"4068":{"name":"文昌市","parent":"22"},
-"4069":{"name":"琼海市","parent":"22"},
-"4070":{"name":"万宁市","parent":"22"},
-"4072":{"name":"定安县","parent":"22"},
-"4073":{"name":"屯昌县","parent":"22"},
-"4074":{"name":"澄迈县","parent":"22"},
-"4075":{"name":"临高县","parent":"22"},
-"4076":{"name":"琼中黎族苗族自治县","parent":"22"},
-"4077":{"name":"保亭黎族苗族自治县","parent":"22"},
-"4078":{"name":"白沙黎族自治县","parent":"22"},
-"4079":{"name":"昌江黎族自治县","parent":"22"},
-"4080":{"name":"乐东黎族自治县","parent":"22"},
-"4081":{"name":"陵水黎族自治县","parent":"22"},
-"4082":{"name":"马来西亚","parent":"3956"},
-"6047":{"name":"长寿区","parent":"23"},
-"6857":{"name":"阿富汗","parent":"3956"},
-"6858":{"name":"阿尔巴尼亚","parent":"3956"},
-"6859":{"name":"阿尔及利亚","parent":"3956"},
-"6860":{"name":"美属萨摩亚","parent":"3956"},
-"6861":{"name":"安道尔","parent":"3956"},
-"6862":{"name":"安哥拉","parent":"3956"},
-"6863":{"name":"安圭拉","parent":"3956"},
-"6864":{"name":"南极洲","parent":"3956"},
-"6865":{"name":"安提瓜和巴布达","parent":"3956"},
-"6866":{"name":"阿根廷","parent":"3956"},
-"6867":{"name":"亚美尼亚","parent":"3956"},
-"6869":{"name":"奥地利","parent":"3956"},
-"6870":{"name":"阿塞拜疆","parent":"3956"},
-"6871":{"name":"巴哈马","parent":"3956"},
-"6872":{"name":"巴林","parent":"3956"},
-"6873":{"name":"孟加拉国","parent":"3956"},
-"6874":{"name":"巴巴多斯","parent":"3956"},
-"6875":{"name":"白俄罗斯","parent":"3956"},
-"6876":{"name":"比利时","parent":"3956"},
-"6877":{"name":"伯利兹","parent":"3956"},
-"6878":{"name":"贝宁","parent":"3956"},
-"6879":{"name":"百慕大","parent":"3956"},
-"6880":{"name":"不丹","parent":"3956"},
-"6881":{"name":"玻利维亚","parent":"3956"},
-"6882":{"name":"波黑","parent":"3956"},
-"6883":{"name":"博茨瓦纳","parent":"3956"},
-"6884":{"name":"布维岛","parent":"3956"},
-"6885":{"name":"巴西","parent":"3956"},
-"6886":{"name":"英属印度洋领土","parent":"3956"},
-"6887":{"name":"文莱","parent":"3956"},
-"6888":{"name":"保加利亚","parent":"3956"},
-"6889":{"name":"布基纳法索","parent":"3956"},
-"6890":{"name":"布隆迪","parent":"3956"},
-"6891":{"name":"柬埔寨","parent":"3956"},
-"6892":{"name":"喀麦隆","parent":"3956"},
-"6893":{"name":"佛得角","parent":"3956"},
-"6894":{"name":"开曼群岛","parent":"3956"},
-"6895":{"name":"中非","parent":"3956"},
-"6896":{"name":"乍得","parent":"3956"},
-"6897":{"name":"智利","parent":"3956"},
-"6898":{"name":"圣诞岛","parent":"3956"},
-"6899":{"name":"科科斯(基林)群岛","parent":"3956"},
-"6900":{"name":"哥伦比亚","parent":"3956"},
-"6901":{"name":"科摩罗","parent":"3956"},
-"6902":{"name":"刚果(布)","parent":"3956"},
-"6903":{"name":"刚果(金)","parent":"3956"},
-"6904":{"name":"库克群岛","parent":"3956"},
-"6905":{"name":"哥斯达黎加","parent":"3956"},
-"6906":{"name":"科特迪瓦","parent":"3956"},
-"6907":{"name":"克罗地亚","parent":"3956"},
-"6908":{"name":"古巴","parent":"3956"},
-"6909":{"name":"塞浦路斯","parent":"3956"},
-"6910":{"name":"捷克","parent":"3956"},
-"6911":{"name":"丹麦","parent":"3956"},
-"6912":{"name":"吉布提","parent":"3956"},
-"6913":{"name":"多米尼克","parent":"3956"},
-"6914":{"name":"多米尼加共和国","parent":"3956"},
-"6915":{"name":"东帝汶","parent":"3956"},
-"6916":{"name":"厄瓜多尔","parent":"3956"},
-"6917":{"name":"埃及","parent":"3956"},
-"6918":{"name":"萨尔瓦多","parent":"3956"},
-"6919":{"name":"赤道几内亚","parent":"3956"},
-"6920":{"name":"厄立特里亚","parent":"3956"},
-"6921":{"name":"爱沙尼亚","parent":"3956"},
-"6922":{"name":"埃塞俄比亚","parent":"3956"},
-"6923":{"name":"福克兰群岛(马尔维纳斯)","parent":"3956"},
-"6924":{"name":"法罗群岛","parent":"3956"},
-"6925":{"name":"斐济","parent":"3956"},
-"6926":{"name":"芬兰","parent":"3956"},
-"6927":{"name":"法属圭亚那","parent":"3956"},
-"6928":{"name":"法属波利尼西亚","parent":"3956"},
-"6929":{"name":"法属南部领土","parent":"3956"},
-"6930":{"name":"加蓬","parent":"3956"},
-"6931":{"name":"冈比亚","parent":"3956"},
-"6932":{"name":"格鲁吉亚","parent":"3956"},
-"6933":{"name":"加纳","parent":"3956"},
-"6934":{"name":"直布罗陀","parent":"3956"},
-"6935":{"name":"希腊","parent":"3956"},
-"6936":{"name":"格陵兰","parent":"3956"},
-"6937":{"name":"格林纳达","parent":"3956"},
-"6938":{"name":"瓜德罗普","parent":"3956"},
-"6939":{"name":"关岛","parent":"3956"},
-"6940":{"name":"危地马拉","parent":"3956"},
-"6941":{"name":"几内亚","parent":"3956"},
-"6942":{"name":"几内亚比绍","parent":"3956"},
-"6943":{"name":"圭亚那","parent":"3956"},
-"6944":{"name":"海地","parent":"3956"},
-"6945":{"name":"赫德岛和麦克唐纳岛","parent":"3956"},
-"6946":{"name":"洪都拉斯","parent":"3956"},
-"6947":{"name":"匈牙利","parent":"3956"},
-"6948":{"name":"冰岛","parent":"3956"},
-"6949":{"name":"印度","parent":"3956"},
-"6950":{"name":"印度尼西亚","parent":"3956"},
-"6951":{"name":"伊朗","parent":"3956"},
-"6952":{"name":"伊拉克","parent":"3956"},
-"6953":{"name":"爱尔兰","parent":"3956"},
-"6954":{"name":"以色列","parent":"3956"},
-"6955":{"name":"牙买加","parent":"3956"},
-"6956":{"name":"约旦","parent":"3956"},
-"6957":{"name":"哈萨克斯坦","parent":"3956"},
-"6958":{"name":"肯尼亚","parent":"3956"},
-"6959":{"name":"基里巴斯","parent":"3956"},
-"6960":{"name":"朝鲜","parent":"3956"},
-"6961":{"name":"科威特","parent":"3956"},
-"6962":{"name":"吉尔吉斯斯坦","parent":"3956"},
-"6963":{"name":"老挝","parent":"3956"},
-"6964":{"name":"拉脱维亚","parent":"3956"},
-"6965":{"name":"黎巴嫩","parent":"3956"},
-"6966":{"name":"莱索托","parent":"3956"},
-"6967":{"name":"利比里亚","parent":"3956"},
-"6968":{"name":"利比亚","parent":"3956"},
-"6969":{"name":"列支敦士登","parent":"3956"},
-"6970":{"name":"立陶宛","parent":"3956"},
-"6971":{"name":"卢森堡","parent":"3956"},
-"6972":{"name":"前南马其顿","parent":"3956"},
-"6973":{"name":"马达加斯加","parent":"3956"},
-"6974":{"name":"马拉维","parent":"3956"},
-"6975":{"name":"马尔代夫","parent":"3956"},
-"6976":{"name":"马里","parent":"3956"},
-"6977":{"name":"马耳他","parent":"3956"},
-"6978":{"name":"马绍尔群岛","parent":"3956"},
-"6979":{"name":"马提尼克","parent":"3956"},
-"6980":{"name":"毛里塔尼亚","parent":"3956"},
-"6981":{"name":"毛里求斯","parent":"3956"},
-"6982":{"name":"马约特","parent":"3956"},
-"6983":{"name":"墨西哥","parent":"3956"},
-"6984":{"name":"密克罗尼西亚联邦","parent":"3956"},
-"6985":{"name":"摩尔多瓦","parent":"3956"},
-"6986":{"name":"摩纳哥","parent":"3956"},
-"6987":{"name":"蒙古","parent":"3956"},
-"6988":{"name":"蒙特塞拉特","parent":"3956"},
-"6989":{"name":"摩洛哥","parent":"3956"},
-"6990":{"name":"莫桑比克","parent":"3956"},
-"6991":{"name":"缅甸","parent":"3956"},
-"6992":{"name":"纳米比亚","parent":"3956"},
-"6993":{"name":"瑙鲁","parent":"3956"},
-"6994":{"name":"尼泊尔","parent":"3956"},
-"6995":{"name":"荷兰","parent":"3956"},
-"6996":{"name":"荷属安的列斯","parent":"3956"},
-"6997":{"name":"新喀里多尼亚","parent":"3956"},
-"6998":{"name":"新西兰","parent":"3956"},
-"6999":{"name":"尼加拉瓜","parent":"3956"},
-"7000":{"name":"尼日尔","parent":"3956"},
-"7001":{"name":"尼日利亚","parent":"3956"},
-"7002":{"name":"纽埃","parent":"3956"},
-"7003":{"name":"诺福克岛","parent":"3956"},
-"7004":{"name":"北马里亚纳","parent":"3956"},
-"7005":{"name":"挪威","parent":"3956"},
-"7006":{"name":"阿曼","parent":"3956"},
-"7007":{"name":"巴基斯坦","parent":"3956"},
-"7008":{"name":"帕劳","parent":"3956"},
-"7009":{"name":"巴勒斯坦","parent":"3956"},
-"7010":{"name":"巴拿马","parent":"3956"},
-"7011":{"name":"巴布亚新几内亚","parent":"3956"},
-"7012":{"name":"巴拉圭","parent":"3956"},
-"7013":{"name":"秘鲁","parent":"3956"},
-"7014":{"name":"菲律宾","parent":"3956"},
-"7015":{"name":"皮特凯恩群岛","parent":"3956"},
-"7016":{"name":"波兰","parent":"3956"},
-"7017":{"name":"葡萄牙","parent":"3956"},
-"7018":{"name":"波多黎各","parent":"3956"},
-"7019":{"name":"卡塔尔","parent":"3956"},
-"7020":{"name":"留尼汪","parent":"3956"},
-"7021":{"name":"罗马尼亚","parent":"3956"},
-"7022":{"name":"俄罗斯联邦","parent":"3956"},
-"7023":{"name":"卢旺达","parent":"3956"},
-"7024":{"name":"圣赫勒拿","parent":"3956"},
-"7025":{"name":"圣基茨和尼维斯","parent":"3956"},
-"7026":{"name":"圣卢西亚","parent":"3956"},
-"7027":{"name":"圣皮埃尔和密克隆","parent":"3956"},
-"7028":{"name":"圣文森特和格林纳丁斯","parent":"3956"},
-"7029":{"name":"萨摩亚","parent":"3956"},
-"7030":{"name":"圣马力诺","parent":"3956"},
-"7031":{"name":"圣多美和普林西比","parent":"3956"},
-"7032":{"name":"沙特阿拉伯","parent":"3956"},
-"7033":{"name":"塞内加尔","parent":"3956"},
-"7034":{"name":"塞舌尔","parent":"3956"},
-"7035":{"name":"塞拉利昂","parent":"3956"},
-"7036":{"name":"新加坡","parent":"3956"},
-"7037":{"name":"斯洛伐克","parent":"3956"},
-"7038":{"name":"斯洛文尼亚","parent":"3956"},
-"7039":{"name":"所罗门群岛","parent":"3956"},
-"7040":{"name":"索马里","parent":"3956"},
-"7041":{"name":"南非","parent":"3956"},
-"7042":{"name":"南乔治亚岛和南桑德韦奇岛","parent":"3956"},
-"7043":{"name":"斯里兰卡","parent":"3956"},
-"7044":{"name":"苏丹","parent":"3956"},
-"7045":{"name":"苏里南","parent":"3956"},
-"7046":{"name":"斯瓦尔巴群岛","parent":"3956"},
-"7047":{"name":"斯威士兰","parent":"3956"},
-"7048":{"name":"瑞典","parent":"3956"},
-"7049":{"name":"瑞士","parent":"3956"},
-"7050":{"name":"叙利亚","parent":"3956"},
-"7051":{"name":"塔吉克斯坦","parent":"3956"},
-"7052":{"name":"坦桑尼亚","parent":"3956"},
-"7053":{"name":"泰国","parent":"3956"},
-"7054":{"name":"多哥","parent":"3956"},
-"7055":{"name":"托克劳","parent":"3956"},
-"7056":{"name":"汤加","parent":"3956"},
-"7057":{"name":"特立尼达和多巴哥","parent":"3956"},
-"7058":{"name":"突尼斯","parent":"3956"},
-"7059":{"name":"土耳其","parent":"3956"},
-"7060":{"name":"土库曼斯坦","parent":"3956"},
-"7061":{"name":"特克斯科斯群岛","parent":"3956"},
-"7062":{"name":"图瓦卢","parent":"3956"},
-"7063":{"name":"乌干达","parent":"3956"},
-"7064":{"name":"乌克兰","parent":"3956"},
-"7065":{"name":"阿联酋","parent":"3956"},
-"7066":{"name":"美国本土外小岛屿","parent":"3956"},
-"7067":{"name":"乌拉圭","parent":"3956"},
-"7068":{"name":"乌兹别克斯坦","parent":"3956"},
-"7069":{"name":"瓦努阿图","parent":"3956"},
-"7070":{"name":"梵蒂冈","parent":"3956"},
-"7071":{"name":"委内瑞拉","parent":"3956"},
-"7072":{"name":"越南","parent":"3956"},
-"7073":{"name":"英属维尔京群岛","parent":"3956"},
-"7074":{"name":"美属维尔京群岛","parent":"3956"},
-"7075":{"name":"瓦利斯和富图纳","parent":"3956"},
-"7076":{"name":"西撒哈拉","parent":"3956"},
-"7077":{"name":"也门","parent":"3956"},
-"7078":{"name":"南斯拉夫","parent":"3956"},
-"7079":{"name":"赞比亚","parent":"3956"},
-"7080":{"name":"津巴布韦","parent":"3956"},
-"7081":{"name":"塞尔维亚","parent":"3956"},
-"7082":{"name":"雄安新区","parent":"4"},
-"7084":{"name":"天门市","parent":"18"}
+ "2": {"name": "北京", "parent": "1"},
+ "3": {"name": "天津", "parent": "1"},
+ "4": {"name": "河北", "parent": "1"},
+ "5": {"name": "山西", "parent": "1"},
+ "6": {"name": "内蒙古", "parent": "1"},
+ "7": {"name": "辽宁", "parent": "1"},
+ "8": {"name": "吉林", "parent": "1"},
+ "9": {"name": "黑龙江", "parent": "1"},
+ "10": {"name": "上海", "parent": "1"},
+ "11": {"name": "江苏", "parent": "1"},
+ "12": {"name": "浙江", "parent": "1"},
+ "13": {"name": "安徽", "parent": "1"},
+ "14": {"name": "福建", "parent": "1"},
+ "15": {"name": "江西", "parent": "1"},
+ "16": {"name": "山东", "parent": "1"},
+ "17": {"name": "河南", "parent": "1"},
+ "18": {"name": "湖北", "parent": "1"},
+ "19": {"name": "湖南", "parent": "1"},
+ "20": {"name": "广东", "parent": "1"},
+ "21": {"name": "广西", "parent": "1"},
+ "22": {"name": "海南", "parent": "1"},
+ "23": {"name": "重庆", "parent": "1"},
+ "24": {"name": "四川", "parent": "1"},
+ "25": {"name": "贵州", "parent": "1"},
+ "26": {"name": "云南", "parent": "1"},
+ "27": {"name": "西藏", "parent": "1"},
+ "28": {"name": "陕西", "parent": "1"},
+ "29": {"name": "甘肃", "parent": "1"},
+ "30": {"name": "青海", "parent": "1"},
+ "31": {"name": "宁夏", "parent": "1"},
+ "32": {"name": "新疆", "parent": "1"},
+ "33": {"name": "北京市", "parent": "2"},
+ "34": {"name": "天津市", "parent": "3"},
+ "35": {"name": "石家庄市", "parent": "4"},
+ "36": {"name": "唐山市", "parent": "4"},
+ "37": {"name": "秦皇岛市", "parent": "4"},
+ "38": {"name": "邯郸市", "parent": "4"},
+ "39": {"name": "邢台市", "parent": "4"},
+ "40": {"name": "保定市", "parent": "4"},
+ "41": {"name": "张家口市", "parent": "4"},
+ "42": {"name": "承德市", "parent": "4"},
+ "43": {"name": "沧州市", "parent": "4"},
+ "44": {"name": "廊坊市", "parent": "4"},
+ "45": {"name": "衡水市", "parent": "4"},
+ "46": {"name": "太原市", "parent": "5"},
+ "47": {"name": "大同市", "parent": "5"},
+ "48": {"name": "阳泉市", "parent": "5"},
+ "49": {"name": "长治市", "parent": "5"},
+ "50": {"name": "晋城市", "parent": "5"},
+ "51": {"name": "朔州市", "parent": "5"},
+ "52": {"name": "晋中市", "parent": "5"},
+ "53": {"name": "运城市", "parent": "5"},
+ "54": {"name": "忻州市", "parent": "5"},
+ "55": {"name": "临汾市", "parent": "5"},
+ "56": {"name": "吕梁市", "parent": "5"},
+ "57": {"name": "呼和浩特市", "parent": "6"},
+ "58": {"name": "包头市", "parent": "6"},
+ "59": {"name": "乌海市", "parent": "6"},
+ "60": {"name": "赤峰市", "parent": "6"},
+ "61": {"name": "通辽市", "parent": "6"},
+ "62": {"name": "鄂尔多斯市", "parent": "6"},
+ "63": {"name": "呼伦贝尔市", "parent": "6"},
+ "64": {"name": "巴彦淖尔市", "parent": "6"},
+ "65": {"name": "乌兰察布市", "parent": "6"},
+ "66": {"name": "兴安盟", "parent": "6"},
+ "67": {"name": "锡林郭勒盟", "parent": "6"},
+ "68": {"name": "阿拉善盟", "parent": "6"},
+ "69": {"name": "沈阳市", "parent": "7"},
+ "70": {"name": "大连市", "parent": "7"},
+ "71": {"name": "鞍山市", "parent": "7"},
+ "72": {"name": "抚顺市", "parent": "7"},
+ "73": {"name": "本溪市", "parent": "7"},
+ "74": {"name": "丹东市", "parent": "7"},
+ "75": {"name": "锦州市", "parent": "7"},
+ "76": {"name": "营口市", "parent": "7"},
+ "77": {"name": "阜新市", "parent": "7"},
+ "78": {"name": "辽阳市", "parent": "7"},
+ "79": {"name": "盘锦市", "parent": "7"},
+ "80": {"name": "铁岭市", "parent": "7"},
+ "81": {"name": "朝阳市", "parent": "7"},
+ "82": {"name": "葫芦岛市", "parent": "7"},
+ "83": {"name": "长春市", "parent": "8"},
+ "84": {"name": "吉林市", "parent": "8"},
+ "85": {"name": "四平市", "parent": "8"},
+ "86": {"name": "辽源市", "parent": "8"},
+ "87": {"name": "通化市", "parent": "8"},
+ "88": {"name": "白山市", "parent": "8"},
+ "89": {"name": "松原市", "parent": "8"},
+ "90": {"name": "白城市", "parent": "8"},
+ "91": {"name": "延边朝鲜族自治州", "parent": "8"},
+ "92": {"name": "哈尔滨市", "parent": "9"},
+ "93": {"name": "齐齐哈尔市", "parent": "9"},
+ "94": {"name": "鸡西市", "parent": "9"},
+ "95": {"name": "鹤岗市", "parent": "9"},
+ "96": {"name": "双鸭山市", "parent": "9"},
+ "97": {"name": "大庆市", "parent": "9"},
+ "98": {"name": "伊春市", "parent": "9"},
+ "99": {"name": "佳木斯市", "parent": "9"},
+ "100": {"name": "七台河市", "parent": "9"},
+ "101": {"name": "牡丹江市", "parent": "9"},
+ "102": {"name": "黑河市", "parent": "9"},
+ "103": {"name": "绥化市", "parent": "9"},
+ "104": {"name": "大兴安岭地区", "parent": "9"},
+ "105": {"name": "上海市", "parent": "10"},
+ "106": {"name": "南京市", "parent": "11"},
+ "107": {"name": "无锡市", "parent": "11"},
+ "108": {"name": "徐州市", "parent": "11"},
+ "109": {"name": "常州市", "parent": "11"},
+ "110": {"name": "苏州市", "parent": "11"},
+ "111": {"name": "南通市", "parent": "11"},
+ "112": {"name": "连云港市", "parent": "11"},
+ "113": {"name": "淮安市", "parent": "11"},
+ "114": {"name": "盐城市", "parent": "11"},
+ "115": {"name": "扬州市", "parent": "11"},
+ "116": {"name": "镇江市", "parent": "11"},
+ "117": {"name": "泰州市", "parent": "11"},
+ "118": {"name": "宿迁市", "parent": "11"},
+ "119": {"name": "杭州市", "parent": "12"},
+ "120": {"name": "宁波市", "parent": "12"},
+ "121": {"name": "温州市", "parent": "12"},
+ "122": {"name": "嘉兴市", "parent": "12"},
+ "123": {"name": "湖州市", "parent": "12"},
+ "124": {"name": "绍兴市", "parent": "12"},
+ "125": {"name": "金华市", "parent": "12"},
+ "126": {"name": "衢州市", "parent": "12"},
+ "127": {"name": "舟山市", "parent": "12"},
+ "128": {"name": "台州市", "parent": "12"},
+ "129": {"name": "丽水市", "parent": "12"},
+ "130": {"name": "合肥市", "parent": "13"},
+ "131": {"name": "芜湖市", "parent": "13"},
+ "132": {"name": "蚌埠市", "parent": "13"},
+ "133": {"name": "淮南市", "parent": "13"},
+ "134": {"name": "马鞍山市", "parent": "13"},
+ "135": {"name": "淮北市", "parent": "13"},
+ "136": {"name": "铜陵市", "parent": "13"},
+ "137": {"name": "安庆市", "parent": "13"},
+ "138": {"name": "黄山市", "parent": "13"},
+ "139": {"name": "滁州市", "parent": "13"},
+ "140": {"name": "阜阳市", "parent": "13"},
+ "141": {"name": "宿州市", "parent": "13"},
+ "143": {"name": "六安市", "parent": "13"},
+ "144": {"name": "亳州市", "parent": "13"},
+ "145": {"name": "池州市", "parent": "13"},
+ "146": {"name": "宣城市", "parent": "13"},
+ "147": {"name": "福州市", "parent": "14"},
+ "148": {"name": "厦门市", "parent": "14"},
+ "149": {"name": "莆田市", "parent": "14"},
+ "150": {"name": "三明市", "parent": "14"},
+ "151": {"name": "泉州市", "parent": "14"},
+ "152": {"name": "漳州市", "parent": "14"},
+ "153": {"name": "南平市", "parent": "14"},
+ "154": {"name": "龙岩市", "parent": "14"},
+ "155": {"name": "宁德市", "parent": "14"},
+ "156": {"name": "南昌市", "parent": "15"},
+ "157": {"name": "景德镇市", "parent": "15"},
+ "158": {"name": "萍乡市", "parent": "15"},
+ "159": {"name": "九江市", "parent": "15"},
+ "160": {"name": "新余市", "parent": "15"},
+ "161": {"name": "鹰潭市", "parent": "15"},
+ "162": {"name": "赣州市", "parent": "15"},
+ "163": {"name": "吉安市", "parent": "15"},
+ "164": {"name": "宜春市", "parent": "15"},
+ "165": {"name": "抚州市", "parent": "15"},
+ "166": {"name": "上饶市", "parent": "15"},
+ "167": {"name": "济南市", "parent": "16"},
+ "168": {"name": "青岛市", "parent": "16"},
+ "169": {"name": "淄博市", "parent": "16"},
+ "170": {"name": "枣庄市", "parent": "16"},
+ "171": {"name": "东营市", "parent": "16"},
+ "172": {"name": "烟台市", "parent": "16"},
+ "173": {"name": "潍坊市", "parent": "16"},
+ "174": {"name": "济宁市", "parent": "16"},
+ "175": {"name": "泰安市", "parent": "16"},
+ "176": {"name": "威海市", "parent": "16"},
+ "177": {"name": "日照市", "parent": "16"},
+ "179": {"name": "临沂市", "parent": "16"},
+ "180": {"name": "德州市", "parent": "16"},
+ "181": {"name": "聊城市", "parent": "16"},
+ "182": {"name": "滨州市", "parent": "16"},
+ "183": {"name": "菏泽市", "parent": "16"},
+ "184": {"name": "郑州市", "parent": "17"},
+ "185": {"name": "开封市", "parent": "17"},
+ "186": {"name": "洛阳市", "parent": "17"},
+ "187": {"name": "平顶山市", "parent": "17"},
+ "188": {"name": "安阳市", "parent": "17"},
+ "189": {"name": "鹤壁市", "parent": "17"},
+ "190": {"name": "新乡市", "parent": "17"},
+ "191": {"name": "焦作市", "parent": "17"},
+ "192": {"name": "濮阳市", "parent": "17"},
+ "193": {"name": "许昌市", "parent": "17"},
+ "194": {"name": "漯河市", "parent": "17"},
+ "195": {"name": "三门峡市", "parent": "17"},
+ "196": {"name": "南阳市", "parent": "17"},
+ "197": {"name": "商丘市", "parent": "17"},
+ "198": {"name": "信阳市", "parent": "17"},
+ "199": {"name": "周口市", "parent": "17"},
+ "200": {"name": "驻马店市", "parent": "17"},
+ "201": {"name": "武汉市", "parent": "18"},
+ "202": {"name": "黄石市", "parent": "18"},
+ "203": {"name": "十堰市", "parent": "18"},
+ "204": {"name": "宜昌市", "parent": "18"},
+ "205": {"name": "襄阳市", "parent": "18"},
+ "206": {"name": "鄂州市", "parent": "18"},
+ "207": {"name": "荆门市", "parent": "18"},
+ "208": {"name": "孝感市", "parent": "18"},
+ "209": {"name": "荆州市", "parent": "18"},
+ "210": {"name": "黄冈市", "parent": "18"},
+ "211": {"name": "咸宁市", "parent": "18"},
+ "212": {"name": "随州市", "parent": "18"},
+ "213": {"name": "恩施土家族苗族自治州", "parent": "18"},
+ "215": {"name": "长沙市", "parent": "19"},
+ "216": {"name": "株洲市", "parent": "19"},
+ "217": {"name": "湘潭市", "parent": "19"},
+ "218": {"name": "衡阳市", "parent": "19"},
+ "219": {"name": "邵阳市", "parent": "19"},
+ "220": {"name": "岳阳市", "parent": "19"},
+ "221": {"name": "常德市", "parent": "19"},
+ "222": {"name": "张家界市", "parent": "19"},
+ "223": {"name": "益阳市", "parent": "19"},
+ "224": {"name": "郴州市", "parent": "19"},
+ "225": {"name": "永州市", "parent": "19"},
+ "226": {"name": "怀化市", "parent": "19"},
+ "227": {"name": "娄底市", "parent": "19"},
+ "228": {"name": "湘西土家族苗族自治州", "parent": "19"},
+ "229": {"name": "广州市", "parent": "20"},
+ "230": {"name": "韶关市", "parent": "20"},
+ "231": {"name": "深圳市", "parent": "20"},
+ "232": {"name": "珠海市", "parent": "20"},
+ "233": {"name": "汕头市", "parent": "20"},
+ "234": {"name": "佛山市", "parent": "20"},
+ "235": {"name": "江门市", "parent": "20"},
+ "236": {"name": "湛江市", "parent": "20"},
+ "237": {"name": "茂名市", "parent": "20"},
+ "238": {"name": "肇庆市", "parent": "20"},
+ "239": {"name": "惠州市", "parent": "20"},
+ "240": {"name": "梅州市", "parent": "20"},
+ "241": {"name": "汕尾市", "parent": "20"},
+ "242": {"name": "河源市", "parent": "20"},
+ "243": {"name": "阳江市", "parent": "20"},
+ "244": {"name": "清远市", "parent": "20"},
+ "245": {"name": "东莞市", "parent": "20"},
+ "246": {"name": "中山市", "parent": "20"},
+ "247": {"name": "潮州市", "parent": "20"},
+ "248": {"name": "揭阳市", "parent": "20"},
+ "249": {"name": "云浮市", "parent": "20"},
+ "250": {"name": "南宁市", "parent": "21"},
+ "251": {"name": "柳州市", "parent": "21"},
+ "252": {"name": "桂林市", "parent": "21"},
+ "253": {"name": "梧州市", "parent": "21"},
+ "254": {"name": "北海市", "parent": "21"},
+ "255": {"name": "防城港市", "parent": "21"},
+ "256": {"name": "钦州市", "parent": "21"},
+ "257": {"name": "贵港市", "parent": "21"},
+ "258": {"name": "玉林市", "parent": "21"},
+ "259": {"name": "百色市", "parent": "21"},
+ "260": {"name": "贺州市", "parent": "21"},
+ "261": {"name": "河池市", "parent": "21"},
+ "262": {"name": "来宾市", "parent": "21"},
+ "263": {"name": "崇左市", "parent": "21"},
+ "264": {"name": "海口市", "parent": "22"},
+ "265": {"name": "三亚市", "parent": "22"},
+ "267": {"name": "重庆市", "parent": "23"},
+ "268": {"name": "成都市", "parent": "24"},
+ "269": {"name": "自贡市", "parent": "24"},
+ "270": {"name": "攀枝花市", "parent": "24"},
+ "271": {"name": "泸州市", "parent": "24"},
+ "272": {"name": "德阳市", "parent": "24"},
+ "273": {"name": "绵阳市", "parent": "24"},
+ "274": {"name": "广元市", "parent": "24"},
+ "275": {"name": "遂宁市", "parent": "24"},
+ "276": {"name": "内江市", "parent": "24"},
+ "277": {"name": "乐山市", "parent": "24"},
+ "278": {"name": "南充市", "parent": "24"},
+ "279": {"name": "眉山市", "parent": "24"},
+ "280": {"name": "宜宾市", "parent": "24"},
+ "281": {"name": "广安市", "parent": "24"},
+ "282": {"name": "达州市", "parent": "24"},
+ "283": {"name": "雅安市", "parent": "24"},
+ "284": {"name": "巴中市", "parent": "24"},
+ "285": {"name": "资阳市", "parent": "24"},
+ "286": {"name": "阿坝藏族羌族自治州", "parent": "24"},
+ "287": {"name": "甘孜藏族自治州", "parent": "24"},
+ "288": {"name": "凉山彝族自治州", "parent": "24"},
+ "289": {"name": "贵阳市", "parent": "25"},
+ "290": {"name": "六盘水市", "parent": "25"},
+ "291": {"name": "遵义市", "parent": "25"},
+ "292": {"name": "安顺市", "parent": "25"},
+ "293": {"name": "铜仁市", "parent": "25"},
+ "294": {"name": "黔西南布依族苗族自治州", "parent": "25"},
+ "295": {"name": "毕节市", "parent": "25"},
+ "296": {"name": "黔东南苗族侗族自治州", "parent": "25"},
+ "297": {"name": "黔南布依族苗族自治州", "parent": "25"},
+ "298": {"name": "昆明市", "parent": "26"},
+ "299": {"name": "曲靖市", "parent": "26"},
+ "300": {"name": "玉溪市", "parent": "26"},
+ "301": {"name": "保山市", "parent": "26"},
+ "302": {"name": "昭通市", "parent": "26"},
+ "303": {"name": "丽江市", "parent": "26"},
+ "304": {"name": "普洱市", "parent": "26"},
+ "305": {"name": "临沧市", "parent": "26"},
+ "306": {"name": "楚雄彝族自治州", "parent": "26"},
+ "307": {"name": "红河哈尼族彝族自治州", "parent": "26"},
+ "308": {"name": "文山壮族苗族自治州", "parent": "26"},
+ "309": {"name": "西双版纳傣族自治州", "parent": "26"},
+ "310": {"name": "大理白族自治州", "parent": "26"},
+ "311": {"name": "德宏傣族景颇族自治州", "parent": "26"},
+ "312": {"name": "怒江傈僳族自治州", "parent": "26"},
+ "313": {"name": "迪庆藏族自治州", "parent": "26"},
+ "314": {"name": "拉萨市", "parent": "27"},
+ "315": {"name": "昌都市", "parent": "27"},
+ "316": {"name": "山南市", "parent": "27"},
+ "317": {"name": "日喀则市", "parent": "27"},
+ "318": {"name": "那曲市", "parent": "27"},
+ "319": {"name": "阿里地区", "parent": "27"},
+ "320": {"name": "林芝市", "parent": "27"},
+ "321": {"name": "西安市", "parent": "28"},
+ "322": {"name": "铜川市", "parent": "28"},
+ "323": {"name": "宝鸡市", "parent": "28"},
+ "324": {"name": "咸阳市", "parent": "28"},
+ "325": {"name": "渭南市", "parent": "28"},
+ "326": {"name": "延安市", "parent": "28"},
+ "327": {"name": "汉中市", "parent": "28"},
+ "328": {"name": "榆林市", "parent": "28"},
+ "329": {"name": "安康市", "parent": "28"},
+ "330": {"name": "商洛市", "parent": "28"},
+ "331": {"name": "兰州市", "parent": "29"},
+ "332": {"name": "嘉峪关市", "parent": "29"},
+ "333": {"name": "金昌市", "parent": "29"},
+ "334": {"name": "白银市", "parent": "29"},
+ "335": {"name": "天水市", "parent": "29"},
+ "336": {"name": "武威市", "parent": "29"},
+ "337": {"name": "张掖市", "parent": "29"},
+ "338": {"name": "平凉市", "parent": "29"},
+ "339": {"name": "酒泉市", "parent": "29"},
+ "340": {"name": "庆阳市", "parent": "29"},
+ "341": {"name": "定西市", "parent": "29"},
+ "342": {"name": "陇南市", "parent": "29"},
+ "343": {"name": "临夏回族自治州", "parent": "29"},
+ "344": {"name": "甘南藏族自治州", "parent": "29"},
+ "345": {"name": "西宁市", "parent": "30"},
+ "346": {"name": "海东市", "parent": "30"},
+ "347": {"name": "海北藏族自治州", "parent": "30"},
+ "348": {"name": "黄南藏族自治州", "parent": "30"},
+ "349": {"name": "海南藏族自治州", "parent": "30"},
+ "350": {"name": "果洛藏族自治州", "parent": "30"},
+ "351": {"name": "玉树藏族自治州", "parent": "30"},
+ "352": {"name": "海西蒙古族藏族自治州", "parent": "30"},
+ "353": {"name": "银川市", "parent": "31"},
+ "354": {"name": "石嘴山市", "parent": "31"},
+ "355": {"name": "吴忠市", "parent": "31"},
+ "356": {"name": "固原市", "parent": "31"},
+ "357": {"name": "中卫市", "parent": "31"},
+ "358": {"name": "乌鲁木齐市", "parent": "32"},
+ "359": {"name": "克拉玛依市", "parent": "32"},
+ "360": {"name": "吐鲁番市", "parent": "32"},
+ "361": {"name": "哈密市", "parent": "32"},
+ "362": {"name": "昌吉回族自治州", "parent": "32"},
+ "363": {"name": "博尔塔拉蒙古自治州", "parent": "32"},
+ "364": {"name": "巴音郭楞蒙古自治州", "parent": "32"},
+ "365": {"name": "阿克苏地区", "parent": "32"},
+ "366": {"name": "克孜勒苏柯尔克孜自治州", "parent": "32"},
+ "367": {"name": "喀什地区", "parent": "32"},
+ "368": {"name": "和田地区", "parent": "32"},
+ "369": {"name": "伊犁哈萨克自治州", "parent": "32"},
+ "370": {"name": "塔城地区", "parent": "32"},
+ "371": {"name": "阿勒泰地区", "parent": "32"},
+ "372": {"name": "新疆省直辖行政单位", "parent": "32"},
+ "373": {"name": "可克达拉市", "parent": "32"},
+ "374": {"name": "昆玉市", "parent": "32"},
+ "375": {"name": "胡杨河市", "parent": "32"},
+ "376": {"name": "双河市", "parent": "32"},
+ "3560": {"name": "北票市", "parent": "7"},
+ "3615": {"name": "高州市", "parent": "20"},
+ "3651": {"name": "济源市", "parent": "17"},
+ "3662": {"name": "胶南市", "parent": "16"},
+ "3683": {"name": "老河口市", "parent": "18"},
+ "3758": {"name": "沙河市", "parent": "4"},
+ "3822": {"name": "宜城市", "parent": "18"},
+ "3842": {"name": "枣阳市", "parent": "18"},
+ "3850": {"name": "肇东市", "parent": "9"},
+ "3905": {"name": "澳门", "parent": "1"},
+ "3906": {"name": "澳门", "parent": "3905"},
+ "3907": {"name": "香港", "parent": "1"},
+ "3908": {"name": "香港", "parent": "3907"},
+ "3947": {"name": "仙桃市", "parent": "18"},
+ "3954": {"name": "台湾", "parent": "1"},
+ "3955": {"name": "台湾", "parent": "3954"},
+ "3956": {"name": "海外", "parent": "1"},
+ "3957": {"name": "海外", "parent": "3956"},
+ "3958": {"name": "美国", "parent": "3956"},
+ "3959": {"name": "加拿大", "parent": "3956"},
+ "3961": {"name": "日本", "parent": "3956"},
+ "3962": {"name": "韩国", "parent": "3956"},
+ "3963": {"name": "德国", "parent": "3956"},
+ "3964": {"name": "英国", "parent": "3956"},
+ "3965": {"name": "意大利", "parent": "3956"},
+ "3966": {"name": "西班牙", "parent": "3956"},
+ "3967": {"name": "法国", "parent": "3956"},
+ "3968": {"name": "澳大利亚", "parent": "3956"},
+ "3969": {"name": "东城区", "parent": "2"},
+ "3970": {"name": "西城区", "parent": "2"},
+ "3971": {"name": "崇文区", "parent": "2"},
+ "3972": {"name": "宣武区", "parent": "2"},
+ "3973": {"name": "朝阳区", "parent": "2"},
+ "3974": {"name": "海淀区", "parent": "2"},
+ "3975": {"name": "丰台区", "parent": "2"},
+ "3976": {"name": "石景山区", "parent": "2"},
+ "3977": {"name": "门头沟区", "parent": "2"},
+ "3978": {"name": "房山区", "parent": "2"},
+ "3979": {"name": "通州区", "parent": "2"},
+ "3980": {"name": "顺义区", "parent": "2"},
+ "3981": {"name": "昌平区", "parent": "2"},
+ "3982": {"name": "大兴区", "parent": "2"},
+ "3983": {"name": "平谷区", "parent": "2"},
+ "3984": {"name": "怀柔区", "parent": "2"},
+ "3985": {"name": "密云区", "parent": "2"},
+ "3986": {"name": "延庆区", "parent": "2"},
+ "3987": {"name": "黄浦区", "parent": "10"},
+ "3988": {"name": "徐汇区", "parent": "10"},
+ "3989": {"name": "长宁区", "parent": "10"},
+ "3990": {"name": "静安区", "parent": "10"},
+ "3991": {"name": "普陀区", "parent": "10"},
+ "3992": {"name": "闸北区", "parent": "10"},
+ "3993": {"name": "虹口区", "parent": "10"},
+ "3994": {"name": "杨浦区", "parent": "10"},
+ "3995": {"name": "宝山区", "parent": "10"},
+ "3996": {"name": "闵行区", "parent": "10"},
+ "3997": {"name": "嘉定区", "parent": "10"},
+ "3998": {"name": "浦东新区", "parent": "10"},
+ "3999": {"name": "松江区", "parent": "10"},
+ "4000": {"name": "金山区", "parent": "10"},
+ "4001": {"name": "青浦区", "parent": "10"},
+ "4002": {"name": "奉贤区", "parent": "10"},
+ "4003": {"name": "崇明区", "parent": "10"},
+ "4004": {"name": "和平区", "parent": "3"},
+ "4005": {"name": "河东区", "parent": "3"},
+ "4006": {"name": "河西区", "parent": "3"},
+ "4007": {"name": "南开区", "parent": "3"},
+ "4008": {"name": "红桥区", "parent": "3"},
+ "4009": {"name": "河北区", "parent": "3"},
+ "4010": {"name": "滨海新区", "parent": "3"},
+ "4011": {"name": "东丽区", "parent": "3"},
+ "4012": {"name": "西青区", "parent": "3"},
+ "4013": {"name": "北辰区", "parent": "3"},
+ "4014": {"name": "津南区", "parent": "3"},
+ "4015": {"name": "武清区", "parent": "3"},
+ "4016": {"name": "宝坻区", "parent": "3"},
+ "4017": {"name": "静海区", "parent": "3"},
+ "4018": {"name": "宁河区", "parent": "3"},
+ "4019": {"name": "蓟州区", "parent": "3"},
+ "4020": {"name": "渝中区", "parent": "23"},
+ "4021": {"name": "江北区", "parent": "23"},
+ "4022": {"name": "南岸区", "parent": "23"},
+ "4023": {"name": "沙坪坝区", "parent": "23"},
+ "4024": {"name": "九龙坡区", "parent": "23"},
+ "4025": {"name": "大渡口区", "parent": "23"},
+ "4026": {"name": "渝北区", "parent": "23"},
+ "4027": {"name": "巴南区", "parent": "23"},
+ "4028": {"name": "北碚区", "parent": "23"},
+ "4029": {"name": "万州区", "parent": "23"},
+ "4030": {"name": "黔江区", "parent": "23"},
+ "4031": {"name": "永川区", "parent": "23"},
+ "4032": {"name": "涪陵区", "parent": "23"},
+ "4033": {"name": "江津区", "parent": "23"},
+ "4034": {"name": "合川区", "parent": "23"},
+ "4035": {"name": "双桥区", "parent": "23"},
+ "4036": {"name": "万盛区", "parent": "23"},
+ "4037": {"name": "荣昌区", "parent": "23"},
+ "4038": {"name": "大足区", "parent": "23"},
+ "4039": {"name": "璧山区", "parent": "23"},
+ "4040": {"name": "铜梁区", "parent": "23"},
+ "4041": {"name": "潼南区", "parent": "23"},
+ "4042": {"name": "綦江区", "parent": "23"},
+ "4043": {"name": "忠县", "parent": "23"},
+ "4044": {"name": "开州区", "parent": "23"},
+ "4045": {"name": "云阳县", "parent": "23"},
+ "4046": {"name": "梁平区", "parent": "23"},
+ "4047": {"name": "垫江县", "parent": "23"},
+ "4048": {"name": "丰都县", "parent": "23"},
+ "4049": {"name": "奉节县", "parent": "23"},
+ "4050": {"name": "巫山县", "parent": "23"},
+ "4051": {"name": "巫溪县", "parent": "23"},
+ "4052": {"name": "城口县", "parent": "23"},
+ "4053": {"name": "武隆区", "parent": "23"},
+ "4054": {"name": "石柱土家族自治县", "parent": "23"},
+ "4055": {"name": "秀山土家族苗族自治县", "parent": "23"},
+ "4056": {"name": "酉阳土家族苗族自治县", "parent": "23"},
+ "4057": {"name": "彭水苗族土家族自治县", "parent": "23"},
+ "4058": {"name": "潜江市", "parent": "18"},
+ "4059": {"name": "三沙市", "parent": "22"},
+ "4060": {"name": "石河子市", "parent": "32"},
+ "4061": {"name": "阿拉尔市", "parent": "32"},
+ "4062": {"name": "图木舒克市", "parent": "32"},
+ "4063": {"name": "五家渠市", "parent": "32"},
+ "4064": {"name": "北屯市", "parent": "32"},
+ "4065": {"name": "铁门关市", "parent": "32"},
+ "4066": {"name": "儋州市", "parent": "22"},
+ "4067": {"name": "五指山市", "parent": "22"},
+ "4068": {"name": "文昌市", "parent": "22"},
+ "4069": {"name": "琼海市", "parent": "22"},
+ "4070": {"name": "万宁市", "parent": "22"},
+ "4072": {"name": "定安县", "parent": "22"},
+ "4073": {"name": "屯昌县", "parent": "22"},
+ "4074": {"name": "澄迈县", "parent": "22"},
+ "4075": {"name": "临高县", "parent": "22"},
+ "4076": {"name": "琼中黎族苗族自治县", "parent": "22"},
+ "4077": {"name": "保亭黎族苗族自治县", "parent": "22"},
+ "4078": {"name": "白沙黎族自治县", "parent": "22"},
+ "4079": {"name": "昌江黎族自治县", "parent": "22"},
+ "4080": {"name": "乐东黎族自治县", "parent": "22"},
+ "4081": {"name": "陵水黎族自治县", "parent": "22"},
+ "4082": {"name": "马来西亚", "parent": "3956"},
+ "6047": {"name": "长寿区", "parent": "23"},
+ "6857": {"name": "阿富汗", "parent": "3956"},
+ "6858": {"name": "阿尔巴尼亚", "parent": "3956"},
+ "6859": {"name": "阿尔及利亚", "parent": "3956"},
+ "6860": {"name": "美属萨摩亚", "parent": "3956"},
+ "6861": {"name": "安道尔", "parent": "3956"},
+ "6862": {"name": "安哥拉", "parent": "3956"},
+ "6863": {"name": "安圭拉", "parent": "3956"},
+ "6864": {"name": "南极洲", "parent": "3956"},
+ "6865": {"name": "安提瓜和巴布达", "parent": "3956"},
+ "6866": {"name": "阿根廷", "parent": "3956"},
+ "6867": {"name": "亚美尼亚", "parent": "3956"},
+ "6869": {"name": "奥地利", "parent": "3956"},
+ "6870": {"name": "阿塞拜疆", "parent": "3956"},
+ "6871": {"name": "巴哈马", "parent": "3956"},
+ "6872": {"name": "巴林", "parent": "3956"},
+ "6873": {"name": "孟加拉国", "parent": "3956"},
+ "6874": {"name": "巴巴多斯", "parent": "3956"},
+ "6875": {"name": "白俄罗斯", "parent": "3956"},
+ "6876": {"name": "比利时", "parent": "3956"},
+ "6877": {"name": "伯利兹", "parent": "3956"},
+ "6878": {"name": "贝宁", "parent": "3956"},
+ "6879": {"name": "百慕大", "parent": "3956"},
+ "6880": {"name": "不丹", "parent": "3956"},
+ "6881": {"name": "玻利维亚", "parent": "3956"},
+ "6882": {"name": "波黑", "parent": "3956"},
+ "6883": {"name": "博茨瓦纳", "parent": "3956"},
+ "6884": {"name": "布维岛", "parent": "3956"},
+ "6885": {"name": "巴西", "parent": "3956"},
+ "6886": {"name": "英属印度洋领土", "parent": "3956"},
+ "6887": {"name": "文莱", "parent": "3956"},
+ "6888": {"name": "保加利亚", "parent": "3956"},
+ "6889": {"name": "布基纳法索", "parent": "3956"},
+ "6890": {"name": "布隆迪", "parent": "3956"},
+ "6891": {"name": "柬埔寨", "parent": "3956"},
+ "6892": {"name": "喀麦隆", "parent": "3956"},
+ "6893": {"name": "佛得角", "parent": "3956"},
+ "6894": {"name": "开曼群岛", "parent": "3956"},
+ "6895": {"name": "中非", "parent": "3956"},
+ "6896": {"name": "乍得", "parent": "3956"},
+ "6897": {"name": "智利", "parent": "3956"},
+ "6898": {"name": "圣诞岛", "parent": "3956"},
+ "6899": {"name": "科科斯(基林)群岛", "parent": "3956"},
+ "6900": {"name": "哥伦比亚", "parent": "3956"},
+ "6901": {"name": "科摩罗", "parent": "3956"},
+ "6902": {"name": "刚果(布)", "parent": "3956"},
+ "6903": {"name": "刚果(金)", "parent": "3956"},
+ "6904": {"name": "库克群岛", "parent": "3956"},
+ "6905": {"name": "哥斯达黎加", "parent": "3956"},
+ "6906": {"name": "科特迪瓦", "parent": "3956"},
+ "6907": {"name": "克罗地亚", "parent": "3956"},
+ "6908": {"name": "古巴", "parent": "3956"},
+ "6909": {"name": "塞浦路斯", "parent": "3956"},
+ "6910": {"name": "捷克", "parent": "3956"},
+ "6911": {"name": "丹麦", "parent": "3956"},
+ "6912": {"name": "吉布提", "parent": "3956"},
+ "6913": {"name": "多米尼克", "parent": "3956"},
+ "6914": {"name": "多米尼加共和国", "parent": "3956"},
+ "6915": {"name": "东帝汶", "parent": "3956"},
+ "6916": {"name": "厄瓜多尔", "parent": "3956"},
+ "6917": {"name": "埃及", "parent": "3956"},
+ "6918": {"name": "萨尔瓦多", "parent": "3956"},
+ "6919": {"name": "赤道几内亚", "parent": "3956"},
+ "6920": {"name": "厄立特里亚", "parent": "3956"},
+ "6921": {"name": "爱沙尼亚", "parent": "3956"},
+ "6922": {"name": "埃塞俄比亚", "parent": "3956"},
+ "6923": {"name": "福克兰群岛(马尔维纳斯)", "parent": "3956"},
+ "6924": {"name": "法罗群岛", "parent": "3956"},
+ "6925": {"name": "斐济", "parent": "3956"},
+ "6926": {"name": "芬兰", "parent": "3956"},
+ "6927": {"name": "法属圭亚那", "parent": "3956"},
+ "6928": {"name": "法属波利尼西亚", "parent": "3956"},
+ "6929": {"name": "法属南部领土", "parent": "3956"},
+ "6930": {"name": "加蓬", "parent": "3956"},
+ "6931": {"name": "冈比亚", "parent": "3956"},
+ "6932": {"name": "格鲁吉亚", "parent": "3956"},
+ "6933": {"name": "加纳", "parent": "3956"},
+ "6934": {"name": "直布罗陀", "parent": "3956"},
+ "6935": {"name": "希腊", "parent": "3956"},
+ "6936": {"name": "格陵兰", "parent": "3956"},
+ "6937": {"name": "格林纳达", "parent": "3956"},
+ "6938": {"name": "瓜德罗普", "parent": "3956"},
+ "6939": {"name": "关岛", "parent": "3956"},
+ "6940": {"name": "危地马拉", "parent": "3956"},
+ "6941": {"name": "几内亚", "parent": "3956"},
+ "6942": {"name": "几内亚比绍", "parent": "3956"},
+ "6943": {"name": "圭亚那", "parent": "3956"},
+ "6944": {"name": "海地", "parent": "3956"},
+ "6945": {"name": "赫德岛和麦克唐纳岛", "parent": "3956"},
+ "6946": {"name": "洪都拉斯", "parent": "3956"},
+ "6947": {"name": "匈牙利", "parent": "3956"},
+ "6948": {"name": "冰岛", "parent": "3956"},
+ "6949": {"name": "印度", "parent": "3956"},
+ "6950": {"name": "印度尼西亚", "parent": "3956"},
+ "6951": {"name": "伊朗", "parent": "3956"},
+ "6952": {"name": "伊拉克", "parent": "3956"},
+ "6953": {"name": "爱尔兰", "parent": "3956"},
+ "6954": {"name": "以色列", "parent": "3956"},
+ "6955": {"name": "牙买加", "parent": "3956"},
+ "6956": {"name": "约旦", "parent": "3956"},
+ "6957": {"name": "哈萨克斯坦", "parent": "3956"},
+ "6958": {"name": "肯尼亚", "parent": "3956"},
+ "6959": {"name": "基里巴斯", "parent": "3956"},
+ "6960": {"name": "朝鲜", "parent": "3956"},
+ "6961": {"name": "科威特", "parent": "3956"},
+ "6962": {"name": "吉尔吉斯斯坦", "parent": "3956"},
+ "6963": {"name": "老挝", "parent": "3956"},
+ "6964": {"name": "拉脱维亚", "parent": "3956"},
+ "6965": {"name": "黎巴嫩", "parent": "3956"},
+ "6966": {"name": "莱索托", "parent": "3956"},
+ "6967": {"name": "利比里亚", "parent": "3956"},
+ "6968": {"name": "利比亚", "parent": "3956"},
+ "6969": {"name": "列支敦士登", "parent": "3956"},
+ "6970": {"name": "立陶宛", "parent": "3956"},
+ "6971": {"name": "卢森堡", "parent": "3956"},
+ "6972": {"name": "前南马其顿", "parent": "3956"},
+ "6973": {"name": "马达加斯加", "parent": "3956"},
+ "6974": {"name": "马拉维", "parent": "3956"},
+ "6975": {"name": "马尔代夫", "parent": "3956"},
+ "6976": {"name": "马里", "parent": "3956"},
+ "6977": {"name": "马耳他", "parent": "3956"},
+ "6978": {"name": "马绍尔群岛", "parent": "3956"},
+ "6979": {"name": "马提尼克", "parent": "3956"},
+ "6980": {"name": "毛里塔尼亚", "parent": "3956"},
+ "6981": {"name": "毛里求斯", "parent": "3956"},
+ "6982": {"name": "马约特", "parent": "3956"},
+ "6983": {"name": "墨西哥", "parent": "3956"},
+ "6984": {"name": "密克罗尼西亚联邦", "parent": "3956"},
+ "6985": {"name": "摩尔多瓦", "parent": "3956"},
+ "6986": {"name": "摩纳哥", "parent": "3956"},
+ "6987": {"name": "蒙古", "parent": "3956"},
+ "6988": {"name": "蒙特塞拉特", "parent": "3956"},
+ "6989": {"name": "摩洛哥", "parent": "3956"},
+ "6990": {"name": "莫桑比克", "parent": "3956"},
+ "6991": {"name": "缅甸", "parent": "3956"},
+ "6992": {"name": "纳米比亚", "parent": "3956"},
+ "6993": {"name": "瑙鲁", "parent": "3956"},
+ "6994": {"name": "尼泊尔", "parent": "3956"},
+ "6995": {"name": "荷兰", "parent": "3956"},
+ "6996": {"name": "荷属安的列斯", "parent": "3956"},
+ "6997": {"name": "新喀里多尼亚", "parent": "3956"},
+ "6998": {"name": "新西兰", "parent": "3956"},
+ "6999": {"name": "尼加拉瓜", "parent": "3956"},
+ "7000": {"name": "尼日尔", "parent": "3956"},
+ "7001": {"name": "尼日利亚", "parent": "3956"},
+ "7002": {"name": "纽埃", "parent": "3956"},
+ "7003": {"name": "诺福克岛", "parent": "3956"},
+ "7004": {"name": "北马里亚纳", "parent": "3956"},
+ "7005": {"name": "挪威", "parent": "3956"},
+ "7006": {"name": "阿曼", "parent": "3956"},
+ "7007": {"name": "巴基斯坦", "parent": "3956"},
+ "7008": {"name": "帕劳", "parent": "3956"},
+ "7009": {"name": "巴勒斯坦", "parent": "3956"},
+ "7010": {"name": "巴拿马", "parent": "3956"},
+ "7011": {"name": "巴布亚新几内亚", "parent": "3956"},
+ "7012": {"name": "巴拉圭", "parent": "3956"},
+ "7013": {"name": "秘鲁", "parent": "3956"},
+ "7014": {"name": "菲律宾", "parent": "3956"},
+ "7015": {"name": "皮特凯恩群岛", "parent": "3956"},
+ "7016": {"name": "波兰", "parent": "3956"},
+ "7017": {"name": "葡萄牙", "parent": "3956"},
+ "7018": {"name": "波多黎各", "parent": "3956"},
+ "7019": {"name": "卡塔尔", "parent": "3956"},
+ "7020": {"name": "留尼汪", "parent": "3956"},
+ "7021": {"name": "罗马尼亚", "parent": "3956"},
+ "7022": {"name": "俄罗斯联邦", "parent": "3956"},
+ "7023": {"name": "卢旺达", "parent": "3956"},
+ "7024": {"name": "圣赫勒拿", "parent": "3956"},
+ "7025": {"name": "圣基茨和尼维斯", "parent": "3956"},
+ "7026": {"name": "圣卢西亚", "parent": "3956"},
+ "7027": {"name": "圣皮埃尔和密克隆", "parent": "3956"},
+ "7028": {"name": "圣文森特和格林纳丁斯", "parent": "3956"},
+ "7029": {"name": "萨摩亚", "parent": "3956"},
+ "7030": {"name": "圣马力诺", "parent": "3956"},
+ "7031": {"name": "圣多美和普林西比", "parent": "3956"},
+ "7032": {"name": "沙特阿拉伯", "parent": "3956"},
+ "7033": {"name": "塞内加尔", "parent": "3956"},
+ "7034": {"name": "塞舌尔", "parent": "3956"},
+ "7035": {"name": "塞拉利昂", "parent": "3956"},
+ "7036": {"name": "新加坡", "parent": "3956"},
+ "7037": {"name": "斯洛伐克", "parent": "3956"},
+ "7038": {"name": "斯洛文尼亚", "parent": "3956"},
+ "7039": {"name": "所罗门群岛", "parent": "3956"},
+ "7040": {"name": "索马里", "parent": "3956"},
+ "7041": {"name": "南非", "parent": "3956"},
+ "7042": {"name": "南乔治亚岛和南桑德韦奇岛", "parent": "3956"},
+ "7043": {"name": "斯里兰卡", "parent": "3956"},
+ "7044": {"name": "苏丹", "parent": "3956"},
+ "7045": {"name": "苏里南", "parent": "3956"},
+ "7046": {"name": "斯瓦尔巴群岛", "parent": "3956"},
+ "7047": {"name": "斯威士兰", "parent": "3956"},
+ "7048": {"name": "瑞典", "parent": "3956"},
+ "7049": {"name": "瑞士", "parent": "3956"},
+ "7050": {"name": "叙利亚", "parent": "3956"},
+ "7051": {"name": "塔吉克斯坦", "parent": "3956"},
+ "7052": {"name": "坦桑尼亚", "parent": "3956"},
+ "7053": {"name": "泰国", "parent": "3956"},
+ "7054": {"name": "多哥", "parent": "3956"},
+ "7055": {"name": "托克劳", "parent": "3956"},
+ "7056": {"name": "汤加", "parent": "3956"},
+ "7057": {"name": "特立尼达和多巴哥", "parent": "3956"},
+ "7058": {"name": "突尼斯", "parent": "3956"},
+ "7059": {"name": "土耳其", "parent": "3956"},
+ "7060": {"name": "土库曼斯坦", "parent": "3956"},
+ "7061": {"name": "特克斯科斯群岛", "parent": "3956"},
+ "7062": {"name": "图瓦卢", "parent": "3956"},
+ "7063": {"name": "乌干达", "parent": "3956"},
+ "7064": {"name": "乌克兰", "parent": "3956"},
+ "7065": {"name": "阿联酋", "parent": "3956"},
+ "7066": {"name": "美国本土外小岛屿", "parent": "3956"},
+ "7067": {"name": "乌拉圭", "parent": "3956"},
+ "7068": {"name": "乌兹别克斯坦", "parent": "3956"},
+ "7069": {"name": "瓦努阿图", "parent": "3956"},
+ "7070": {"name": "梵蒂冈", "parent": "3956"},
+ "7071": {"name": "委内瑞拉", "parent": "3956"},
+ "7072": {"name": "越南", "parent": "3956"},
+ "7073": {"name": "英属维尔京群岛", "parent": "3956"},
+ "7074": {"name": "美属维尔京群岛", "parent": "3956"},
+ "7075": {"name": "瓦利斯和富图纳", "parent": "3956"},
+ "7076": {"name": "西撒哈拉", "parent": "3956"},
+ "7077": {"name": "也门", "parent": "3956"},
+ "7078": {"name": "南斯拉夫", "parent": "3956"},
+ "7079": {"name": "赞比亚", "parent": "3956"},
+ "7080": {"name": "津巴布韦", "parent": "3956"},
+ "7081": {"name": "塞尔维亚", "parent": "3956"},
+ "7082": {"name": "雄安新区", "parent": "4"},
+ "7084": {"name": "天门市", "parent": "18"},
}
-NM_SET = set([v["name"] for _,v in TBL.items()])
+NM_SET = set([v["name"] for _, v in TBL.items()])
+
def get_names(id):
- if not id or str(id).lower() == "none":return []
+ if not id or str(id).lower() == "none":
+ return []
id = str(id)
- if not re.match("[0-9]+$", id.strip()):return [id]
+ if not re.match("[0-9]+$", id.strip()):
+ return [id]
nms = []
d = TBL.get(id)
- if not d:return[]
+ if not d:
+ return []
nms.append(d["name"])
p = get_names(d["parent"])
- if p: nms.extend(p)
+ if p:
+ nms.extend(p)
return nms
-import re
+
+
def isName(nm):
- if nm in NM_SET:return True
- if nm + "市" in NM_SET:return True
- if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET:return True
+ if nm in NM_SET:
+ return True
+ if nm + "市" in NM_SET:
+ return True
+ if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET:
+ return True
return False
diff --git a/deepdoc/parser/resume/entities/schools.py b/deepdoc/parser/resume/entities/schools.py
index 31662cde94f9ba161e97ad3ba3703b526bf6b706..c6b0bf7fdbccc49aaa694c6134be20afe20e0c5e 100644
--- a/deepdoc/parser/resume/entities/schools.py
+++ b/deepdoc/parser/resume/entities/schools.py
@@ -16,8 +16,11 @@ import json
import re
import copy
import pandas as pd
+
current_file_path = os.path.dirname(os.path.abspath(__file__))
-TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("")
+TBL = pd.read_csv(
+ os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0
+).fillna("")
TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r"))
GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
@@ -26,14 +29,15 @@ GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
def loadRank(fnm):
global TBL
TBL["rank"] = 1000000
- with open(fnm, "r", encoding='utf-8') as f:
+ with open(fnm, "r", encoding="utf-8") as f:
while True:
- l = f.readline()
- if not l:break
- l = l.strip("\n").split(",")
+ line = f.readline()
+ if not line:
+ break
+ line = line.strip("\n").split(",")
try:
- nm,rk = l[0].strip(),int(l[1])
- #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
+ nm, rk = line[0].strip(), int(line[1])
+ # assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk
except Exception:
pass
@@ -44,27 +48,35 @@ loadRank(os.path.join(current_file_path, "res/school.rank.csv"))
def split(txt):
tks = []
- for t in re.sub(r"[ \t]+", " ",txt).split():
- if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \
- re.match(r"[a-zA-Z]", t) and tks:
+ for t in re.sub(r"[ \t]+", " ", txt).split():
+ if (
+ tks
+ and re.match(r".*[a-zA-Z]$", tks[-1])
+ and re.match(r"[a-zA-Z]", t)
+ and tks
+ ):
tks[-1] = tks[-1] + " " + t
- else:tks.append(t)
+ else:
+ tks.append(t)
return tks
def select(nm):
global TBL
- if not nm:return
- if isinstance(nm, list):nm = str(nm[0])
+ if not nm:
+ return
+ if isinstance(nm, list):
+ nm = str(nm[0])
nm = split(nm)[0]
nm = str(nm).lower().strip()
nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
nm = re.sub(r"(^the |[,.&()();;·]+|^(英国|美国|瑞士))", "", nm)
nm = re.sub(r"大学.*学院", "大学", nm)
tbl = copy.deepcopy(TBL)
- tbl["hit_alias"] = tbl["alias"].map(lambda x:nm in set(x.split("+")))
- res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | (tbl.hit_alias == True))]
- if res.empty:return
+ tbl["hit_alias"] = tbl["alias"].map(lambda x: nm in set(x.split("+")))
+ res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | tbl.hit_alias)]
+ if res.empty:
+ return
return json.loads(res.to_json(orient="records"))[0]
@@ -74,4 +86,3 @@ def is_good(nm):
nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
nm = re.sub(r"[''`‘’“”,. &()();;]+", "", nm)
return nm in GOOD_SCH
-
diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py
index 7d4297774231fcaf620b7b2074b8574faa374969..9e4376735c3b1ea8bd2d9f2cad12cee01ef53366 100644
--- a/deepdoc/parser/resume/step_two.py
+++ b/deepdoc/parser/resume/step_two.py
@@ -25,7 +25,8 @@ from xpinyin import Pinyin
from contextlib import contextmanager
-class TimeoutException(Exception): pass
+class TimeoutException(Exception):
+ pass
@contextmanager
@@ -50,8 +51,10 @@ def rmHtmlTag(line):
def highest_degree(dg):
- if not dg: return ""
- if type(dg) == type(""): dg = [dg]
+ if not dg:
+ return ""
+ if isinstance(dg, str):
+ dg = [dg]
m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8}
return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0]
@@ -68,10 +71,12 @@ def forEdu(cv):
for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))):
e = {}
if n.get("end_time"):
- if n["end_time"] > edu_end_dt: edu_end_dt = n["end_time"]
+ if n["end_time"] > edu_end_dt:
+ edu_end_dt = n["end_time"]
try:
dt = n["end_time"]
- if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
+ if re.match(r"[0-9]{9,}", dt):
+ dt = turnTm2Dt(dt)
y, m, d = getYMD(dt)
ed_dt.append(str(y))
e["end_dt_kwd"] = str(y)
@@ -80,7 +85,8 @@ def forEdu(cv):
if n.get("start_time"):
try:
dt = n["start_time"]
- if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
+ if re.match(r"[0-9]{9,}", dt):
+ dt = turnTm2Dt(dt)
y, m, d = getYMD(dt)
st_dt.append(str(y))
e["start_dt_kwd"] = str(y)
@@ -89,13 +95,20 @@ def forEdu(cv):
r = schools.select(n.get("school_name", ""))
if r:
- if str(r.get("type", "")) == "1": fea.append("211")
- if str(r.get("type", "")) == "2": fea.append("211")
- if str(r.get("is_abroad", "")) == "1": fea.append("留学")
- if str(r.get("is_double_first", "")) == "1": fea.append("双一流")
- if str(r.get("is_985", "")) == "1": fea.append("985")
- if str(r.get("is_world_known", "")) == "1": fea.append("海外知名")
- if r.get("rank") and cv["school_rank_int"] > r["rank"]: cv["school_rank_int"] = r["rank"]
+ if str(r.get("type", "")) == "1":
+ fea.append("211")
+ if str(r.get("type", "")) == "2":
+ fea.append("211")
+ if str(r.get("is_abroad", "")) == "1":
+ fea.append("留学")
+ if str(r.get("is_double_first", "")) == "1":
+ fea.append("双一流")
+ if str(r.get("is_985", "")) == "1":
+ fea.append("985")
+ if str(r.get("is_world_known", "")) == "1":
+ fea.append("海外知名")
+ if r.get("rank") and cv["school_rank_int"] > r["rank"]:
+ cv["school_rank_int"] = r["rank"]
if n.get("school_name") and isinstance(n["school_name"], str):
sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"]))
@@ -106,22 +119,25 @@ def forEdu(cv):
maj.append(n["discipline_name"])
e["major_kwd"] = n["discipline_name"]
- if not n.get("degree") and "985" in fea and not first_fea: n["degree"] = "1"
+ if not n.get("degree") and "985" in fea and not first_fea:
+ n["degree"] = "1"
if n.get("degree"):
d = degrees.get_name(n["degree"])
- if d: e["degree_kwd"] = d
- if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)",
- n.get(
- "school_name",
- ""))): d = "专升本"
- if d: deg.append(d)
+ if d:
+ e["degree_kwd"] = d
+ if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", n.get("school_name",""))):
+ d = "专升本"
+ if d:
+ deg.append(d)
# for first degree
if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]:
fdeg = [d]
- if n.get("school_name"): fsch = [n["school_name"]]
- if n.get("discipline_name"): fmaj = [n["discipline_name"]]
+ if n.get("school_name"):
+ fsch = [n["school_name"]]
+ if n.get("discipline_name"):
+ fmaj = [n["discipline_name"]]
first_fea = copy.deepcopy(fea)
edu_nst.append(e)
@@ -140,16 +156,26 @@ def forEdu(cv):
else:
cv["sch_rank_kwd"].append("一般学校")
- if edu_nst: cv["edu_nst"] = edu_nst
- if fea: cv["edu_fea_kwd"] = list(set(fea))
- if first_fea: cv["edu_first_fea_kwd"] = list(set(first_fea))
- if maj: cv["major_kwd"] = maj
- if fsch: cv["first_school_name_kwd"] = fsch
- if fdeg: cv["first_degree_kwd"] = fdeg
- if fmaj: cv["first_major_kwd"] = fmaj
- if st_dt: cv["edu_start_kwd"] = st_dt
- if ed_dt: cv["edu_end_kwd"] = ed_dt
- if ed_dt: cv["edu_end_int"] = max([int(t) for t in ed_dt])
+ if edu_nst:
+ cv["edu_nst"] = edu_nst
+ if fea:
+ cv["edu_fea_kwd"] = list(set(fea))
+ if first_fea:
+ cv["edu_first_fea_kwd"] = list(set(first_fea))
+ if maj:
+ cv["major_kwd"] = maj
+ if fsch:
+ cv["first_school_name_kwd"] = fsch
+ if fdeg:
+ cv["first_degree_kwd"] = fdeg
+ if fmaj:
+ cv["first_major_kwd"] = fmaj
+ if st_dt:
+ cv["edu_start_kwd"] = st_dt
+ if ed_dt:
+ cv["edu_end_kwd"] = ed_dt
+ if ed_dt:
+ cv["edu_end_int"] = max([int(t) for t in ed_dt])
if deg:
if "本科" in deg and "专科" in deg:
deg.append("专升本")
@@ -158,8 +184,10 @@ def forEdu(cv):
cv["highest_degree_kwd"] = highest_degree(deg)
if edu_end_dt:
try:
- if re.match(r"[0-9]{9,}", edu_end_dt): edu_end_dt = turnTm2Dt(edu_end_dt)
- if edu_end_dt.strip("\n") == "至今": edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
+ if re.match(r"[0-9]{9,}", edu_end_dt):
+ edu_end_dt = turnTm2Dt(edu_end_dt)
+ if edu_end_dt.strip("\n") == "至今":
+ edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
y, m, d = getYMD(edu_end_dt)
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
except Exception as e:
@@ -171,7 +199,8 @@ def forEdu(cv):
or not cv.get("degree_kwd"):
for c in sch:
if schools.is_good(c):
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
+ if "tag_kwd" not in cv:
+ cv["tag_kwd"] = []
cv["tag_kwd"].append("好学校")
cv["tag_kwd"].append("好学历")
break
@@ -180,28 +209,39 @@ def forEdu(cv):
any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
- if "好学历" not in cv["tag_kwd"]: cv["tag_kwd"].append("好学历")
-
- if cv.get("major_kwd"): cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj))
- if cv.get("school_name_kwd"): cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch))
- if cv.get("first_school_name_kwd"): cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch))
- if cv.get("first_major_kwd"): cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj))
+ if "tag_kwd" not in cv:
+ cv["tag_kwd"] = []
+ if "好学历" not in cv["tag_kwd"]:
+ cv["tag_kwd"].append("好学历")
+
+ if cv.get("major_kwd"):
+ cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj))
+ if cv.get("school_name_kwd"):
+ cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch))
+ if cv.get("first_school_name_kwd"):
+ cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch))
+ if cv.get("first_major_kwd"):
+ cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj))
return cv
def forProj(cv):
- if not cv.get("project_obj"): return cv
+ if not cv.get("project_obj"):
+ return cv
pro_nms, desc = [], []
for i, n in enumerate(
- sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if type(x) == type({}) else "",
+ sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if isinstance(x, dict) else "",
reverse=True)):
- if n.get("name"): pro_nms.append(n["name"])
- if n.get("describe"): desc.append(str(n["describe"]))
- if n.get("responsibilities"): desc.append(str(n["responsibilities"]))
- if n.get("achivement"): desc.append(str(n["achivement"]))
+ if n.get("name"):
+ pro_nms.append(n["name"])
+ if n.get("describe"):
+ desc.append(str(n["describe"]))
+ if n.get("responsibilities"):
+ desc.append(str(n["responsibilities"]))
+ if n.get("achivement"):
+ desc.append(str(n["achivement"]))
if pro_nms:
# cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms))
@@ -233,15 +273,16 @@ def forWork(cv):
work_st_tm = ""
corp_tags = []
for i, n in enumerate(
- sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if type(x) == type({}) else "",
+ sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if isinstance(x, dict) else "",
reverse=True)):
- if type(n) == type(""):
+ if isinstance(n, str):
try:
n = json_loads(n)
except Exception:
continue
- if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
+ if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm):
+ work_st_tm = n["start_time"]
for c in flds:
if not n.get(c) or str(n[c]) == '0':
fea[c].append("")
@@ -262,14 +303,18 @@ def forWork(cv):
fea[c].append(rmHtmlTag(str(n[c]).lower()))
y, m, d = getYMD(n.get("start_time"))
- if not y or not m: continue
+ if not y or not m:
+ continue
st = "%s-%02d-%02d" % (y, int(m), int(d))
latest_job_tm = st
y, m, d = getYMD(n.get("end_time"))
- if (not y or not m) and i > 0: continue
- if not y or not m or int(y) > 2022: y, m, d = getYMD(str(n.get("updated_at", "")))
- if not y or not m: continue
+ if (not y or not m) and i > 0:
+ continue
+ if not y or not m or int(y) > 2022:
+ y, m, d = getYMD(str(n.get("updated_at", "")))
+ if not y or not m:
+ continue
ed = "%s-%02d-%02d" % (y, int(m), int(d))
try:
@@ -279,22 +324,28 @@ def forWork(cv):
if n.get("scale"):
r = re.search(r"^([0-9]+)", str(n["scale"]))
- if r: scales.append(int(r.group(1)))
+ if r:
+ scales.append(int(r.group(1)))
if goodcorp:
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
+ if "tag_kwd" not in cv:
+ cv["tag_kwd"] = []
cv["tag_kwd"].append("好公司")
if goodcorp_:
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
+ if "tag_kwd" not in cv:
+ cv["tag_kwd"] = []
cv["tag_kwd"].append("好公司(曾)")
if corp_tags:
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
+ if "tag_kwd" not in cv:
+ cv["tag_kwd"] = []
cv["tag_kwd"].extend(corp_tags)
cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)]
- if latest_job_tm: cv["latest_job_dt"] = latest_job_tm
- if fea["corporation_id"]: cv["corporation_id"] = fea["corporation_id"]
+ if latest_job_tm:
+ cv["latest_job_dt"] = latest_job_tm
+ if fea["corporation_id"]:
+ cv["corporation_id"] = fea["corporation_id"]
if fea["position_name"]:
cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0])
@@ -317,18 +368,23 @@ def forWork(cv):
cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0])
cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:]))
- if fea["subordinates_count"]: fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
+ if fea["subordinates_count"]:
+ fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
re.match(r"[^0-9]+$", str(i))]
- if fea["subordinates_count"]: cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])
+ if fea["subordinates_count"]:
+ cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])
- if type(cv.get("corporation_id")) == type(1): cv["corporation_id"] = [str(cv["corporation_id"])]
- if not cv.get("corporation_id"): cv["corporation_id"] = []
+ if isinstance(cv.get("corporation_id"), int):
+ cv["corporation_id"] = [str(cv["corporation_id"])]
+ if not cv.get("corporation_id"):
+ cv["corporation_id"] = []
for i in cv.get("corporation_id", []):
cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0)
if work_st_tm:
try:
- if re.match(r"[0-9]{9,}", work_st_tm): work_st_tm = turnTm2Dt(work_st_tm)
+ if re.match(r"[0-9]{9,}", work_st_tm):
+ work_st_tm = turnTm2Dt(work_st_tm)
y, m, d = getYMD(work_st_tm)
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
except Exception as e:
@@ -339,28 +395,37 @@ def forWork(cv):
cv["dua_flt"] = np.mean(duas)
cv["cur_dua_int"] = duas[0]
cv["job_num_int"] = len(duas)
- if scales: cv["scale_flt"] = np.max(scales)
+ if scales:
+ cv["scale_flt"] = np.max(scales)
return cv
def turnTm2Dt(b):
- if not b: return
+ if not b:
+ return
b = str(b).strip()
- if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
+ if re.match(r"[0-9]{10,}", b):
+ b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
return b
def getYMD(b):
y, m, d = "", "", "01"
- if not b: return (y, m, d)
+ if not b:
+ return (y, m, d)
b = turnTm2Dt(b)
- if re.match(r"[0-9]{4}", b): y = int(b[:4])
+ if re.match(r"[0-9]{4}", b):
+ y = int(b[:4])
r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b)
- if r: m = r.group(1)
+ if r:
+ m = r.group(1)
r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b)
- if r: d = r.group(1)
- if not d or int(d) == 0 or int(d) > 31: d = "1"
- if not m or int(m) > 12 or int(m) < 1: m = "1"
+ if r:
+ d = r.group(1)
+ if not d or int(d) == 0 or int(d) > 31:
+ d = "1"
+ if not m or int(m) > 12 or int(m) < 1:
+ m = "1"
return (y, m, d)
@@ -369,7 +434,8 @@ def birth(cv):
cv["integerity_flt"] *= 0.9
return cv
y, m, d = getYMD(cv["birth"])
- if not m or not y: return cv
+ if not m or not y:
+ return cv
b = "%s-%02d-%02d" % (y, int(m), int(d))
cv["birth_dt"] = b
cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d))
@@ -380,7 +446,8 @@ def birth(cv):
def parse(cv):
for k in cv.keys():
- if cv[k] == '\\N': cv[k] = ''
+ if cv[k] == '\\N':
+ cv[k] = ''
# cv = cv.asDict()
tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names",
"expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name",
@@ -402,9 +469,12 @@ def parse(cv):
rmkeys = []
for k in cv.keys():
- if cv[k] is None: rmkeys.append(k)
- if (type(cv[k]) == type([]) or type(cv[k]) == type("")) and len(cv[k]) == 0: rmkeys.append(k)
- for k in rmkeys: del cv[k]
+ if cv[k] is None:
+ rmkeys.append(k)
+ if (isinstance(cv[k], list) or isinstance(cv[k], str)) and len(cv[k]) == 0:
+ rmkeys.append(k)
+ for k in rmkeys:
+ del cv[k]
integerity = 0.
flds_num = 0.
@@ -414,7 +484,8 @@ def parse(cv):
flds_num += len(flds)
for f in flds:
v = str(cv.get(f, ""))
- if len(v) > 0 and v != '0' and v != '[]': integerity += 1
+ if len(v) > 0 and v != '0' and v != '[]':
+ integerity += 1
hasValues(tks_fld)
hasValues(small_tks_fld)
@@ -433,7 +504,8 @@ def parse(cv):
(r"[ ()\(\)人/·0-9-]+", ""),
(r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]:
cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE)
- if len(cv["corporation_type"]) < 2: del cv["corporation_type"]
+ if len(cv["corporation_type"]) < 2:
+ del cv["corporation_type"]
if cv.get("political_status"):
for p, r in [
@@ -441,9 +513,11 @@ def parse(cv):
(r".*(无党派|公民).*", "群众"),
(r".*团员.*", "团员")]:
cv["political_status"] = re.sub(p, r, cv["political_status"])
- if not re.search(r"[党团群]", cv["political_status"]): del cv["political_status"]
+ if not re.search(r"[党团群]", cv["political_status"]):
+ del cv["political_status"]
- if cv.get("phone"): cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))
+ if cv.get("phone"):
+ cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))
keys = list(cv.keys())
for k in keys:
@@ -454,9 +528,11 @@ def parse(cv):
cv[k] = [a for _, a in cv[k].items()]
nms = []
for n in cv[k]:
- if type(n) != type({}) or "name" not in n or not n.get("name"): continue
+ if not isinstance(n, dict) or "name" not in n or not n.get("name"):
+ continue
n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower()
- if not n["name"]: continue
+ if not n["name"]:
+ continue
nms.append(n["name"])
if nms:
t = k[:-4]
@@ -469,15 +545,18 @@ def parse(cv):
# tokenize fields
if k in tks_fld:
cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k])
- if k in small_tks_fld: cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"])
+ if k in small_tks_fld:
+ cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"])
# keyword fields
- if k in kwd_fld: cv[f"{k}_kwd"] = [n.lower()
+ if k in kwd_fld:
+ cv[f"{k}_kwd"] = [n.lower()
for n in re.split(r"[\t,,;;. ]",
re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k])
) if n]
- if k in num_fld and cv.get(k): cv[f"{k}_int"] = cv[k]
+ if k in num_fld and cv.get(k):
+ cv[f"{k}_int"] = cv[k]
cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "")
# for name field
@@ -501,10 +580,12 @@ def parse(cv):
cv["name_py_pref0_tks"] = ""
cv["name_py_pref_tks"] = ""
for py in PY.get_pinyins(nm[:20], ''):
- for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i]
+ for i in range(2, len(py) + 1):
+ cv["name_py_pref_tks"] += " " + py[:i]
for py in PY.get_pinyins(nm[:20], ' '):
py = py.split()
- for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i])
+ for i in range(1, len(py) + 1):
+ cv["name_py_pref0_tks"] += " " + "".join(py[:i])
cv["name_kwd"] = name
cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3]
@@ -526,22 +607,30 @@ def parse(cv):
cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S')
else:
y, m, d = getYMD(str(cv.get("updated_at", "")))
- if not y: y = "2012"
- if not m: m = "01"
- if not d: d = "01"
+ if not y:
+ y = "2012"
+ if not m:
+ m = "01"
+ if not d:
+ d = "01"
cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
# long text tokenize
- if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
+ if cv.get("responsibilities"):
+ cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
# for yes or no field
fea = []
for f, y, n in is_fld:
- if f not in cv: continue
- if cv[f] == '是': fea.append(y)
- if cv[f] == '否': fea.append(n)
+ if f not in cv:
+ continue
+ if cv[f] == '是':
+ fea.append(y)
+ if cv[f] == '否':
+ fea.append(n)
- if fea: cv["tag_kwd"] = fea
+ if fea:
+ cv["tag_kwd"] = fea
cv = forEdu(cv)
cv = forProj(cv)
@@ -550,9 +639,11 @@ def parse(cv):
cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])]
for i in range(len(cv["corp_proj_sch_deg_kwd"])):
- for j in cv.get("sch_rank_kwd", []): cv["corp_proj_sch_deg_kwd"][i] += "+" + j
+ for j in cv.get("sch_rank_kwd", []):
+ cv["corp_proj_sch_deg_kwd"][i] += "+" + j
for i in range(len(cv["corp_proj_sch_deg_kwd"])):
- if cv.get("highest_degree_kwd"): cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]
+ if cv.get("highest_degree_kwd"):
+ cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]
try:
if not cv.get("work_exp_flt") and cv.get("work_start_time"):
@@ -565,17 +656,21 @@ def parse(cv):
cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
except Exception as e:
logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
- if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
+ if "work_exp_flt" not in cv and cv.get("work_experience", 0):
+ cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
keys = list(cv.keys())
for k in keys:
- if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): del cv[k]
+ if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k):
+ del cv[k]
for k in cv.keys():
- if not re.search("_(kwd|id)$", k) or type(cv[k]) != type([]): continue
+ if not re.search("_(kwd|id)$", k) or not isinstance(cv[k], list):
+ continue
cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']]))
keys = [k for k in cv.keys() if re.search(r"_feas*$", k)]
for k in keys:
- if cv[k] <= 0: del cv[k]
+ if cv[k] <= 0:
+ del cv[k]
cv["tob_resume_id"] = str(cv["tob_resume_id"])
cv["id"] = cv["tob_resume_id"]
@@ -592,5 +687,6 @@ def dealWithInt64(d):
if isinstance(d, list):
d = [dealWithInt64(t) for t in d]
- if isinstance(d, np.integer): d = int(d)
+ if isinstance(d, np.integer):
+ d = int(d)
return d
diff --git a/deepdoc/parser/txt_parser.py b/deepdoc/parser/txt_parser.py
index 620368987ace8b0bf7fec456cebb4136675d3284..93b52eea32d4434f5e8fb949af4f88a1b7fed636 100644
--- a/deepdoc/parser/txt_parser.py
+++ b/deepdoc/parser/txt_parser.py
@@ -51,6 +51,7 @@ class RAGFlowTxtParser:
dels = [d for d in dels if d]
dels = "|".join(dels)
secs = re.split(r"(%s)" % dels, txt)
- for sec in secs: add_chunk(sec)
+ for sec in secs:
+ add_chunk(sec)
return [[c, ""] for c in cks]
diff --git a/deepdoc/vision/__init__.py b/deepdoc/vision/__init__.py
index 9f16fe3d8c758e30044c1e9b5356e75afce3a10c..131827b57ad9fdb7039679653d026b0fefaaef25 100644
--- a/deepdoc/vision/__init__.py
+++ b/deepdoc/vision/__init__.py
@@ -18,7 +18,6 @@ from .recognizer import Recognizer
from .layout_recognizer import LayoutRecognizer
from .table_structure_recognizer import TableStructureRecognizer
-
def init_in_out(args):
from PIL import Image
import os
@@ -47,7 +46,7 @@ def init_in_out(args):
try:
images.append(Image.open(fnm))
outputs.append(os.path.split(fnm)[-1])
- except Exception as e:
+ except Exception:
traceback.print_exc()
if os.path.isdir(args.inputs):
@@ -56,6 +55,16 @@ def init_in_out(args):
else:
images_and_outputs(args.inputs)
- for i in range(len(outputs)): outputs[i] = os.path.join(args.output_dir, outputs[i])
+ for i in range(len(outputs)):
+ outputs[i] = os.path.join(args.output_dir, outputs[i])
+
+ return images, outputs
+
- return images, outputs
\ No newline at end of file
+__all__ = [
+ "OCR",
+ "Recognizer",
+ "LayoutRecognizer",
+ "TableStructureRecognizer",
+ "init_in_out",
+]
diff --git a/deepdoc/vision/layout_recognizer.py b/deepdoc/vision/layout_recognizer.py
index 88006f9af60a88ff7aba0abded64c1e701bc248e..e8a6fdc5cfd83bdb3165e35f2232f42709b491d5 100644
--- a/deepdoc/vision/layout_recognizer.py
+++ b/deepdoc/vision/layout_recognizer.py
@@ -42,7 +42,7 @@ class LayoutRecognizer(Recognizer):
get_project_base_directory(),
"rag/res/deepdoc")
super().__init__(self.labels, domain, model_dir)
- except Exception as e:
+ except Exception:
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
local_dir_use_symlinks=False)
@@ -77,7 +77,7 @@ class LayoutRecognizer(Recognizer):
"page_number": pn,
} for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts]
lts = self.sort_Y_firstly(lts, np.mean(
- [l["bottom"] - l["top"] for l in lts]) / 2)
+ [lt["bottom"] - lt["top"] for lt in lts]) / 2)
lts = self.layouts_cleanup(bxs, lts)
page_layout.append(lts)
diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py
index 317b671fdd4b6e70a1f6f318dad16cddfd97672a..ee8ca6ab1eafeace731f587370505be19d5b22dd 100644
--- a/deepdoc/vision/ocr.py
+++ b/deepdoc/vision/ocr.py
@@ -19,7 +19,9 @@ from huggingface_hub import snapshot_download
from api.utils.file_utils import get_project_base_directory
from .operators import *
+import math
import numpy as np
+import cv2
import onnxruntime as ort
from .postprocess import build_post_process
@@ -484,7 +486,7 @@ class OCR(object):
"rag/res/deepdoc")
self.text_detector = TextDetector(model_dir)
self.text_recognizer = TextRecognizer(model_dir)
- except Exception as e:
+ except Exception:
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
local_dir_use_symlinks=False)
diff --git a/deepdoc/vision/operators.py b/deepdoc/vision/operators.py
index 9037fc455238cb5b1d4c90c56b37b6cc202efad7..80ae299486569dc239e773cb292414042eb45c96 100644
--- a/deepdoc/vision/operators.py
+++ b/deepdoc/vision/operators.py
@@ -232,7 +232,7 @@ class LinearResize(object):
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
- im_channel = im.shape[2]
+ _im_channel = im.shape[2]
im_scale_y, im_scale_x = self.generate_scale(im)
im = cv2.resize(
im,
@@ -255,7 +255,7 @@ class LinearResize(object):
im_scale_y: the resize ratio of Y
"""
origin_shape = im.shape[:2]
- im_c = im.shape[2]
+ _im_c = im.shape[2]
if self.keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
@@ -581,7 +581,7 @@ class SRResize(object):
return data
images_HR = data["image_hr"]
- label_strs = data["label"]
+ _label_strs = data["label"]
transform = ResizeNormalize((imgW, imgH))
images_HR = transform(images_HR)
data["img_hr"] = images_HR
diff --git a/deepdoc/vision/postprocess.py b/deepdoc/vision/postprocess.py
index 9ab08e4ffa699c7403ce174b12bf81727a09a96f..6fb111de3bb843231aefae7edaa0bbeae9b44364 100644
--- a/deepdoc/vision/postprocess.py
+++ b/deepdoc/vision/postprocess.py
@@ -121,7 +121,7 @@ class DBPostProcess(object):
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
if len(outs) == 3:
- img, contours, _ = outs[0], outs[1], outs[2]
+ _img, contours, _ = outs[0], outs[1], outs[2]
elif len(outs) == 2:
contours, _ = outs[0], outs[1]
diff --git a/deepdoc/vision/recognizer.py b/deepdoc/vision/recognizer.py
index 2181550e548fe1275f3a3570a86365216889a9c0..de5dc7ed06e2ab57de918cbab82fec3a55ff72ee 100644
--- a/deepdoc/vision/recognizer.py
+++ b/deepdoc/vision/recognizer.py
@@ -13,15 +13,18 @@
import logging
import os
+import math
+import numpy as np
+import cv2
from copy import deepcopy
+
import onnxruntime as ort
from huggingface_hub import snapshot_download
from api.utils.file_utils import get_project_base_directory
from .operators import *
-
class Recognizer(object):
def __init__(self, label_list, task_name, model_dir=None):
"""
@@ -277,7 +280,8 @@ class Recognizer(object):
return
min_dis, min_i = 1000000, None
for i,b in enumerate(boxes):
- if box.get("layoutno", "0") != b.get("layoutno", "0"): continue
+ if box.get("layoutno", "0") != b.get("layoutno", "0"):
+ continue
dis = min(abs(box["x0"] - b["x0"]), abs(box["x1"] - b["x1"]), abs(box["x0"]+box["x1"] - b["x1"] - b["x0"])/2)
if dis < min_dis:
min_i = i
@@ -402,7 +406,8 @@ class Recognizer(object):
scores = np.max(boxes[:, 4:], axis=1)
boxes = boxes[scores > thr, :]
scores = scores[scores > thr]
- if len(boxes) == 0: return []
+ if len(boxes) == 0:
+ return []
# Get the class with the highest confidence
class_ids = np.argmax(boxes[:, 4:], axis=1)
@@ -432,7 +437,8 @@ class Recognizer(object):
for i in range(len(image_list)):
if not isinstance(image_list[i], np.ndarray):
imgs.append(np.array(image_list[i]))
- else: imgs.append(image_list[i])
+ else:
+ imgs.append(image_list[i])
batch_loop_cnt = math.ceil(float(len(imgs)) / batch_size)
for i in range(batch_loop_cnt):
diff --git a/graphrag/community_reports_extractor.py b/graphrag/community_reports_extractor.py
index 25f7b170b78f870ad7efdd778b5a823062032d71..756a7811eb985e8e20ca78d8de49ae2c9f249cc6 100644
--- a/graphrag/community_reports_extractor.py
+++ b/graphrag/community_reports_extractor.py
@@ -88,7 +88,8 @@ class CommunityReportsExtractor:
("findings", list),
("rating", float),
("rating_explanation", str),
- ]): continue
+ ]):
+ continue
response["weight"] = weight
response["entities"] = ents
except Exception as e:
@@ -100,7 +101,8 @@ class CommunityReportsExtractor:
res_str.append(self._get_text_output(response))
res_dict.append(response)
over += 1
- if callback: callback(msg=f"Communities: {over}/{total}, elapsed: {timer() - st}s, used tokens: {token_count}")
+ if callback:
+ callback(msg=f"Communities: {over}/{total}, elapsed: {timer() - st}s, used tokens: {token_count}")
return CommunityReportsResult(
structured_output=res_dict,
diff --git a/graphrag/entity_embedding.py b/graphrag/entity_embedding.py
index 892d7db39896cc3364825fc436a55c391dcf5194..af7bc2a7ba3ce0d02b5e0f1b80112147312a519b 100644
--- a/graphrag/entity_embedding.py
+++ b/graphrag/entity_embedding.py
@@ -8,6 +8,7 @@ Reference:
from typing import Any
import numpy as np
import networkx as nx
+from dataclasses import dataclass
from graphrag.leiden import stable_largest_connected_component
diff --git a/graphrag/graph_extractor.py b/graphrag/graph_extractor.py
index 0a83454027c3a3514dda6d2adae55282ffe9091d..290390ac9c44b1a8bcc16f3dab55482d27ce55c0 100644
--- a/graphrag/graph_extractor.py
+++ b/graphrag/graph_extractor.py
@@ -129,9 +129,11 @@ class GraphExtractor:
source_doc_map[doc_index] = text
all_records[doc_index] = result
total_token_count += token_count
- if callback: callback(msg=f"{doc_index+1}/{total}, elapsed: {timer() - st}s, used tokens: {total_token_count}")
+ if callback:
+ callback(msg=f"{doc_index+1}/{total}, elapsed: {timer() - st}s, used tokens: {total_token_count}")
except Exception as e:
- if callback: callback(msg="Knowledge graph extraction error:{}".format(str(e)))
+ if callback:
+ callback(msg="Knowledge graph extraction error:{}".format(str(e)))
logging.exception("error extracting graph")
self._on_error(
e,
@@ -164,7 +166,8 @@ class GraphExtractor:
text = perform_variable_replacements(self._extraction_prompt, variables=variables)
gen_conf = {"temperature": 0.3}
response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
- if response.find("**ERROR**") >= 0: raise Exception(response)
+ if response.find("**ERROR**") >= 0:
+ raise Exception(response)
token_count = num_tokens_from_string(text + response)
results = response or ""
@@ -175,7 +178,8 @@ class GraphExtractor:
text = perform_variable_replacements(CONTINUE_PROMPT, history=history, variables=variables)
history.append({"role": "user", "content": text})
response = self._llm.chat("", history, gen_conf)
- if response.find("**ERROR**") >=0: raise Exception(response)
+ if response.find("**ERROR**") >=0:
+ raise Exception(response)
results += response or ""
# if this is the final glean, don't bother updating the continuation flag
diff --git a/graphrag/index.py b/graphrag/index.py
index 89e332cd02948114854114bbeb1524421dc6f887..09c62a271937fefb7a33ed0f40997bbb9110db0d 100644
--- a/graphrag/index.py
+++ b/graphrag/index.py
@@ -134,7 +134,8 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: list[str], callback, en
callback(0.75, "Extracting mind graph.")
mindmap = MindMapExtractor(llm_bdl)
mg = mindmap(_chunks).output
- if not len(mg.keys()): return chunks
+ if not len(mg.keys()):
+ return chunks
logging.debug(json.dumps(mg, ensure_ascii=False, indent=2))
chunks.append(
diff --git a/graphrag/leiden.py b/graphrag/leiden.py
index 4c87f085f1764756e5147954136dd34f55a817d3..315e6ff933745689bcb8c898061260921299b7bb 100644
--- a/graphrag/leiden.py
+++ b/graphrag/leiden.py
@@ -78,7 +78,8 @@ def _compute_leiden_communities(
) -> dict[int, dict[str, int]]:
"""Return Leiden root communities."""
results: dict[int, dict[str, int]] = {}
- if is_empty(graph): return results
+ if is_empty(graph):
+ return results
if use_lcc:
graph = stable_largest_connected_component(graph)
@@ -100,7 +101,8 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
logging.debug(
"Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
)
- if not graph.nodes(): return {}
+ if not graph.nodes():
+ return {}
node_id_to_community_map = _compute_leiden_communities(
graph=graph,
@@ -125,9 +127,11 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
result[community_id]["nodes"].append(node_id)
result[community_id]["weight"] += graph.nodes[node_id].get("rank", 0) * graph.nodes[node_id].get("weight", 1)
weights = [comm["weight"] for _, comm in result.items()]
- if not weights:continue
+ if not weights:
+ continue
max_weight = max(weights)
- for _, comm in result.items(): comm["weight"] /= max_weight
+ for _, comm in result.items():
+ comm["weight"] /= max_weight
return results_by_level
diff --git a/intergrations/chatgpt-on-wechat/plugins/__init__.py b/intergrations/chatgpt-on-wechat/plugins/__init__.py
index 4b79b693129324968bfb9e99d86f00e3b5e27e49..c1c3a156841ca77c2f2a32eceff0ab78ae69ff22 100644
--- a/intergrations/chatgpt-on-wechat/plugins/__init__.py
+++ b/intergrations/chatgpt-on-wechat/plugins/__init__.py
@@ -1 +1,5 @@
-from .ragflow_chat import *
+from .ragflow_chat import RAGFlowChat
+
+__all__ = [
+ "RAGFlowChat"
+]
diff --git a/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py b/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py
index 17878c540bf07c0da924a7e91955d68c1c687dca..5d5615eaa17536a35f687efb06e4f946d7eedbcc 100644
--- a/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py
+++ b/intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py
@@ -2,7 +2,6 @@ import logging
import requests
from bridge.context import ContextType # Import Context, ContextType
from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
-from bridge import *
from plugins import Plugin, register # Import Plugin and register
from plugins.event import Event, EventContext, EventAction # Import event-related classes
diff --git a/rag/app/book.py b/rag/app/book.py
index 65de875a1e0ccd56b90d6eccd70d0611662518a6..763364778bc099b5f557d4a0acedb072acbdaa51 100644
--- a/rag/app/book.py
+++ b/rag/app/book.py
@@ -94,7 +94,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
callback(0.1, "Start to parse.")
txt = get_text(filename, binary)
sections = txt.split("\n")
- sections = [(l, "") for l in sections if l]
+ sections = [(line, "") for line in sections if line]
remove_contents_table(sections, eng=is_english(
random_choices([t for t, _ in sections], k=200)))
callback(0.8, "Finish parsing.")
@@ -102,7 +102,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
sections = HtmlParser()(filename, binary)
- sections = [(l, "") for l in sections if l]
+ sections = [(line, "") for line in sections if line]
remove_contents_table(sections, eng=is_english(
random_choices([t for t, _ in sections], k=200)))
callback(0.8, "Finish parsing.")
@@ -112,7 +112,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
binary = BytesIO(binary)
doc_parsed = parser.from_buffer(binary)
sections = doc_parsed['content'].split('\n')
- sections = [(l, "") for l in sections if l]
+ sections = [(line, "") for line in sections if line]
remove_contents_table(sections, eng=is_english(
random_choices([t for t, _ in sections], k=200)))
callback(0.8, "Finish parsing.")
diff --git a/rag/app/email.py b/rag/app/email.py
index 5226c78eead8fa86a3bfcbd715ffd9c1ec822ec1..b14ee6d43e04561208dcc78e558dc428e9defd5c 100644
--- a/rag/app/email.py
+++ b/rag/app/email.py
@@ -75,7 +75,7 @@ def chunk(
_add_content(msg, msg.get_content_type())
sections = TxtParser.parser_txt("\n".join(text_txt)) + [
- (l, "") for l in HtmlParser.parser_txt("\n".join(html_txt)) if l
+ (line, "") for line in HtmlParser.parser_txt("\n".join(html_txt)) if line
]
st = timer()
diff --git a/rag/app/knowledge_graph.py b/rag/app/knowledge_graph.py
index 74fbbec1018e7d8f9248a1fcd53a7382671294e3..b252d56153c46e7044f5aa330304335f2b937b56 100644
--- a/rag/app/knowledge_graph.py
+++ b/rag/app/knowledge_graph.py
@@ -18,7 +18,8 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
)
- for c in chunks: c["docnm_kwd"] = filename
+ for c in chunks:
+ c["docnm_kwd"] = filename
doc = {
"docnm_kwd": filename,
diff --git a/rag/app/laws.py b/rag/app/laws.py
index 3ee41c7a7a9299923e07e82a83b6742f03bb1e06..1e3dedfdc1b8a640efef186a0d27aef983ad7554 100644
--- a/rag/app/laws.py
+++ b/rag/app/laws.py
@@ -48,7 +48,7 @@ class Docx(DocxParser):
continue
if 'w:br' in run._element.xml and 'type="page"' in run._element.xml:
pn += 1
- return [l for l in lines if l]
+ return [line for line in lines if line]
def __call__(self, filename, binary=None, from_page=0, to_page=100000):
self.doc = Document(
@@ -60,7 +60,8 @@ class Docx(DocxParser):
if pn > to_page:
break
question_level, p_text = docx_question_level(p, bull)
- if not p_text.strip("\n"):continue
+ if not p_text.strip("\n"):
+ continue
lines.append((question_level, p_text))
for run in p.runs:
@@ -78,19 +79,21 @@ class Docx(DocxParser):
if lines[e][0] <= lines[s][0]:
break
e += 1
- if e - s == 1 and visit[s]: continue
+ if e - s == 1 and visit[s]:
+ continue
sec = []
next_level = lines[s][0] + 1
while not sec and next_level < 22:
for i in range(s+1, e):
- if lines[i][0] != next_level: continue
+ if lines[i][0] != next_level:
+ continue
sec.append(lines[i][1])
visit[i] = True
next_level += 1
sec.insert(0, lines[s][1])
sections.append("\n".join(sec))
- return [l for l in sections if l]
+ return [s for s in sections if s]
def __str__(self) -> str:
return f'''
@@ -168,13 +171,13 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
callback(0.1, "Start to parse.")
txt = get_text(filename, binary)
sections = txt.split("\n")
- sections = [l for l in sections if l]
+ sections = [s for s in sections if s]
callback(0.8, "Finish parsing.")
elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
sections = HtmlParser()(filename, binary)
- sections = [l for l in sections if l]
+ sections = [s for s in sections if s]
callback(0.8, "Finish parsing.")
elif re.search(r"\.doc$", filename, re.IGNORECASE):
@@ -182,7 +185,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
binary = BytesIO(binary)
doc_parsed = parser.from_buffer(binary)
sections = doc_parsed['content'].split('\n')
- sections = [l for l in sections if l]
+ sections = [s for s in sections if s]
callback(0.8, "Finish parsing.")
else:
diff --git a/rag/app/manual.py b/rag/app/manual.py
index 1ea5e9633d40470f7fb4bcbc9ed8353fa0f6c4cb..49acac1581178b9b1394c07be693bcdf47d4cc7a 100644
--- a/rag/app/manual.py
+++ b/rag/app/manual.py
@@ -190,7 +190,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
sections, tbls = pdf_parser(filename if not binary else binary,
from_page=from_page, to_page=to_page, callback=callback)
if sections and len(sections[0]) < 3:
- sections = [(t, l, [[0] * 5]) for t, l in sections]
+ sections = [(t, lvl, [[0] * 5]) for t, lvl in sections]
# set pivot using the most frequent type of title,
# then merge between 2 pivot
if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.1:
@@ -211,7 +211,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
else:
bull = bullets_category([txt for txt, _, _ in sections])
most_level, levels = title_frequency(
- bull, [(txt, l) for txt, l, poss in sections])
+ bull, [(txt, lvl) for txt, lvl, _ in sections])
assert len(sections) == len(levels)
sec_ids = []
@@ -225,7 +225,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
sections = [(txt, sec_ids[i], poss)
for i, (txt, _, poss) in enumerate(sections)]
for (img, rows), poss in tbls:
- if not rows: continue
+ if not rows:
+ continue
sections.append((rows if isinstance(rows, str) else rows[0], -1,
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
diff --git a/rag/app/one.py b/rag/app/one.py
index c7fe0314c2f75a0c4e64918bf87eefbe9927a7c7..bd691f46c3e66f5a4d352fae23c999e91208907d 100644
--- a/rag/app/one.py
+++ b/rag/app/one.py
@@ -54,7 +54,8 @@ class Pdf(PdfParser):
sections = [(b["text"], self.get_position(b, zoomin))
for i, b in enumerate(self.boxes)]
for (img, rows), poss in tbls:
- if not rows:continue
+ if not rows:
+ continue
sections.append((rows if isinstance(rows, str) else rows[0],
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
@@ -109,7 +110,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
binary = BytesIO(binary)
doc_parsed = parser.from_buffer(binary)
sections = doc_parsed['content'].split('\n')
- sections = [l for l in sections if l]
+ sections = [s for s in sections if s]
callback(0.8, "Finish parsing.")
else:
diff --git a/rag/app/qa.py b/rag/app/qa.py
index 0fd7a932b6325c19e6a4edad51f73afb0b340a53..d77daebd6430d1b63542755efe0d867f24ddca65 100644
--- a/rag/app/qa.py
+++ b/rag/app/qa.py
@@ -171,7 +171,7 @@ class Pdf(PdfParser):
tbl_bottom = tbls[tbl_index][1][0][4]
tbl_tag = "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \
.format(tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom)
- tbl_text = ''.join(tbls[tbl_index][0][1])
+ _tbl_text = ''.join(tbls[tbl_index][0][1])
return tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag,
@@ -325,9 +325,11 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
txt = get_text(filename, binary)
lines = txt.split("\n")
comma, tab = 0, 0
- for l in lines:
- if len(l.split(",")) == 2: comma += 1
- if len(l.split("\t")) == 2: tab += 1
+ for line in lines:
+ if len(line.split(",")) == 2:
+ comma += 1
+ if len(line.split("\t")) == 2:
+ tab += 1
delimiter = "\t" if tab >= comma else ","
fails = []
@@ -336,18 +338,21 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
while i < len(lines):
arr = lines[i].split(delimiter)
if len(arr) != 2:
- if question: answer += "\n" + lines[i]
+ if question:
+ answer += "\n" + lines[i]
else:
fails.append(str(i+1))
elif len(arr) == 2:
- if question and answer: res.append(beAdoc(deepcopy(doc), question, answer, eng))
+ if question and answer:
+ res.append(beAdoc(deepcopy(doc), question, answer, eng))
question, answer = arr
i += 1
if len(res) % 999 == 0:
callback(len(res) * 0.6 / len(lines), ("Extract Q&A: {}".format(len(res)) + (
f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else "")))
- if question: res.append(beAdoc(deepcopy(doc), question, answer, eng))
+ if question:
+ res.append(beAdoc(deepcopy(doc), question, answer, eng))
callback(0.6, ("Extract Q&A: {}".format(len(res)) + (
f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else "")))
@@ -367,19 +372,18 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
callback(0.1, "Start to parse.")
txt = get_text(filename, binary)
lines = txt.split("\n")
- last_question, last_answer = "", ""
+ _last_question, last_answer = "", ""
question_stack, level_stack = [], []
code_block = False
- level_index = [-1] * 7
- for index, l in enumerate(lines):
- if l.strip().startswith('```'):
+ for index, line in enumerate(lines):
+ if line.strip().startswith('```'):
code_block = not code_block
question_level, question = 0, ''
if not code_block:
- question_level, question = mdQuestionLevel(l)
+ question_level, question = mdQuestionLevel(line)
if not question_level or question_level > 6: # not a question
- last_answer = f'{last_answer}\n{l}'
+ last_answer = f'{last_answer}\n{line}'
else: # is a question
if last_answer.strip():
sum_question = '\n'.join(question_stack)
diff --git a/rag/app/table.py b/rag/app/table.py
index b5148983ce73bcabeb48d43d66e7951165b12722..d7ba35a6bbd57fbd8189abf1016a67d286a4a944 100644
--- a/rag/app/table.py
+++ b/rag/app/table.py
@@ -41,14 +41,16 @@ class Excel(ExcelParser):
for sheetname in wb.sheetnames:
ws = wb[sheetname]
rows = list(ws.rows)
- if not rows:continue
+ if not rows:
+ continue
headers = [cell.value for cell in rows[0]]
missed = set([i for i, h in enumerate(headers) if h is None])
headers = [
cell.value for i,
cell in enumerate(
rows[0]) if i not in missed]
- if not headers:continue
+ if not headers:
+ continue
data = []
for i, r in enumerate(rows[1:]):
rn += 1
@@ -88,7 +90,6 @@ def trans_bool(s):
def column_data_type(arr):
arr = list(arr)
- uni = len(set([a for a in arr if a is not None]))
counts = {"int": 0, "float": 0, "text": 0, "datetime": 0, "bool": 0}
trans = {t: f for f, t in
[(int, "int"), (float, "float"), (trans_datatime, "datetime"), (trans_bool, "bool"), (str, "text")]}
@@ -157,7 +158,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000,
continue
if i >= to_page:
break
- row = [l for l in line.split(kwargs.get("delimiter", "\t"))]
+ row = [field for field in line.split(kwargs.get("delimiter", "\t"))]
if len(row) != len(headers):
fails.append(str(i))
continue
diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
index 647b2a909e17600a8286f9c40f3f02e91994e8cd..bc35fe63dbfb2f6f828c62bb2a6e1488bd44cf5c 100644
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
@@ -13,12 +13,124 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from .embedding_model import *
-from .chat_model import *
-from .cv_model import *
-from .rerank_model import *
-from .sequence2txt_model import *
-from .tts_model import *
+from .embedding_model import (
+ OllamaEmbed,
+ LocalAIEmbed,
+ OpenAIEmbed,
+ AzureEmbed,
+ XinferenceEmbed,
+ QWenEmbed,
+ ZhipuEmbed,
+ FastEmbed,
+ YoudaoEmbed,
+ BaiChuanEmbed,
+ JinaEmbed,
+ DefaultEmbedding,
+ MistralEmbed,
+ BedrockEmbed,
+ GeminiEmbed,
+ NvidiaEmbed,
+ LmStudioEmbed,
+ OpenAI_APIEmbed,
+ CoHereEmbed,
+ TogetherAIEmbed,
+ PerfXCloudEmbed,
+ UpstageEmbed,
+ SILICONFLOWEmbed,
+ ReplicateEmbed,
+ BaiduYiyanEmbed,
+ VoyageEmbed,
+ HuggingFaceEmbed,
+ VolcEngineEmbed,
+)
+from .chat_model import (
+ GptTurbo,
+ AzureChat,
+ ZhipuChat,
+ QWenChat,
+ OllamaChat,
+ LocalAIChat,
+ XinferenceChat,
+ MoonshotChat,
+ DeepSeekChat,
+ VolcEngineChat,
+ BaiChuanChat,
+ MiniMaxChat,
+ MistralChat,
+ GeminiChat,
+ BedrockChat,
+ GroqChat,
+ OpenRouterChat,
+ StepFunChat,
+ NvidiaChat,
+ LmStudioChat,
+ OpenAI_APIChat,
+ CoHereChat,
+ LeptonAIChat,
+ TogetherAIChat,
+ PerfXCloudChat,
+ UpstageChat,
+ NovitaAIChat,
+ SILICONFLOWChat,
+ YiChat,
+ ReplicateChat,
+ HunyuanChat,
+ SparkChat,
+ BaiduYiyanChat,
+ AnthropicChat,
+ GoogleChat,
+ HuggingFaceChat,
+)
+
+from .cv_model import (
+ GptV4,
+ AzureGptV4,
+ OllamaCV,
+ XinferenceCV,
+ QWenCV,
+ Zhipu4V,
+ LocalCV,
+ GeminiCV,
+ OpenRouterCV,
+ LocalAICV,
+ NvidiaCV,
+ LmStudioCV,
+ StepFunCV,
+ OpenAI_APICV,
+ TogetherAICV,
+ YiCV,
+ HunyuanCV,
+)
+from .rerank_model import (
+ LocalAIRerank,
+ DefaultRerank,
+ JinaRerank,
+ YoudaoRerank,
+ XInferenceRerank,
+ NvidiaRerank,
+ LmStudioRerank,
+ OpenAI_APIRerank,
+ CoHereRerank,
+ TogetherAIRerank,
+ SILICONFLOWRerank,
+ BaiduYiyanRerank,
+ VoyageRerank,
+ QWenRerank,
+)
+from .sequence2txt_model import (
+ GPTSeq2txt,
+ QWenSeq2txt,
+ AzureSeq2txt,
+ XinferenceSeq2txt,
+ TencentCloudSeq2txt,
+)
+from .tts_model import (
+ FishAudioTTS,
+ QwenTTS,
+ OpenAITTS,
+ SparkTTS,
+ XinferenceTTS,
+)
EmbeddingModel = {
"Ollama": OllamaEmbed,
@@ -48,7 +160,7 @@ EmbeddingModel = {
"BaiduYiyan": BaiduYiyanEmbed,
"Voyage AI": VoyageEmbed,
"HuggingFace": HuggingFaceEmbed,
- "VolcEngine":VolcEngineEmbed,
+ "VolcEngine": VolcEngineEmbed,
}
CvModel = {
@@ -68,7 +180,7 @@ CvModel = {
"OpenAI-API-Compatible": OpenAI_APICV,
"TogetherAI": TogetherAICV,
"01.AI": YiCV,
- "Tencent Hunyuan": HunyuanCV
+ "Tencent Hunyuan": HunyuanCV,
}
ChatModel = {
@@ -111,7 +223,7 @@ ChatModel = {
}
RerankModel = {
- "LocalAI":LocalAIRerank,
+ "LocalAI": LocalAIRerank,
"BAAI": DefaultRerank,
"Jina": JinaRerank,
"Youdao": YoudaoRerank,
@@ -132,7 +244,7 @@ Seq2txtModel = {
"Tongyi-Qianwen": QWenSeq2txt,
"Azure-OpenAI": AzureSeq2txt,
"Xinference": XinferenceSeq2txt,
- "Tencent Cloud": TencentCloudSeq2txt
+ "Tencent Cloud": TencentCloudSeq2txt,
}
TTSModel = {
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 26ce3e1489f617dbfcee997de393e3b3c925b3d4..cf038cb433c92e3e03f873d5cbb8dfaa1702e577 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -69,7 +69,8 @@ class Base(ABC):
stream=True,
**gen_conf)
for resp in response:
- if not resp.choices: continue
+ if not resp.choices:
+ continue
if not resp.choices[0].delta.content:
resp.choices[0].delta.content = ""
ans += resp.choices[0].delta.content
@@ -81,7 +82,8 @@ class Base(ABC):
)
elif isinstance(resp.usage, dict):
total_tokens = resp.usage.get("total_tokens", total_tokens)
- else: total_tokens = resp.usage.total_tokens
+ else:
+ total_tokens = resp.usage.total_tokens
if resp.choices[0].finish_reason == "length":
if is_chinese(ans):
@@ -98,13 +100,15 @@ class Base(ABC):
class GptTurbo(Base):
def __init__(self, key, model_name="gpt-3.5-turbo", base_url="https://api.openai.com/v1"):
- if not base_url: base_url = "https://api.openai.com/v1"
+ if not base_url:
+ base_url = "https://api.openai.com/v1"
super().__init__(key, model_name, base_url)
class MoonshotChat(Base):
def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1"):
- if not base_url: base_url = "https://api.moonshot.cn/v1"
+ if not base_url:
+ base_url = "https://api.moonshot.cn/v1"
super().__init__(key, model_name, base_url)
@@ -128,7 +132,8 @@ class HuggingFaceChat(Base):
class DeepSeekChat(Base):
def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1"):
- if not base_url: base_url = "https://api.deepseek.com/v1"
+ if not base_url:
+ base_url = "https://api.deepseek.com/v1"
super().__init__(key, model_name, base_url)
@@ -202,7 +207,8 @@ class BaiChuanChat(Base):
stream=True,
**self._format_params(gen_conf))
for resp in response:
- if not resp.choices: continue
+ if not resp.choices:
+ continue
if not resp.choices[0].delta.content:
resp.choices[0].delta.content = ""
ans += resp.choices[0].delta.content
@@ -313,8 +319,10 @@ class ZhipuChat(Base):
if system:
history.insert(0, {"role": "system", "content": system})
try:
- if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]
+ if "presence_penalty" in gen_conf:
+ del gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ del gen_conf["frequency_penalty"]
response = self.client.chat.completions.create(
model=self.model_name,
messages=history,
@@ -333,8 +341,10 @@ class ZhipuChat(Base):
def chat_streamly(self, system, history, gen_conf):
if system:
history.insert(0, {"role": "system", "content": system})
- if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]
+ if "presence_penalty" in gen_conf:
+ del gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ del gen_conf["frequency_penalty"]
ans = ""
tk_count = 0
try:
@@ -345,7 +355,8 @@ class ZhipuChat(Base):
**gen_conf
)
for resp in response:
- if not resp.choices[0].delta.content: continue
+ if not resp.choices[0].delta.content:
+ continue
delta = resp.choices[0].delta.content
ans += delta
if resp.choices[0].finish_reason == "length":
@@ -354,7 +365,8 @@ class ZhipuChat(Base):
else:
ans += LENGTH_NOTIFICATION_EN
tk_count = resp.usage.total_tokens
- if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens
+ if resp.choices[0].finish_reason == "stop":
+ tk_count = resp.usage.total_tokens
yield ans
except Exception as e:
yield ans + "\n**ERROR**: " + str(e)
@@ -372,11 +384,16 @@ class OllamaChat(Base):
history.insert(0, {"role": "system", "content": system})
try:
options = {}
- if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]
- if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]
- if "top_p" in gen_conf: options["top_p"] = gen_conf["top_p"]
- if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]
+ if "temperature" in gen_conf:
+ options["temperature"] = gen_conf["temperature"]
+ if "max_tokens" in gen_conf:
+ options["num_predict"] = gen_conf["max_tokens"]
+ if "top_p" in gen_conf:
+ options["top_p"] = gen_conf["top_p"]
+ if "presence_penalty" in gen_conf:
+ options["presence_penalty"] = gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ options["frequency_penalty"] = gen_conf["frequency_penalty"]
response = self.client.chat(
model=self.model_name,
messages=history,
@@ -392,11 +409,16 @@ class OllamaChat(Base):
if system:
history.insert(0, {"role": "system", "content": system})
options = {}
- if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]
- if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]
- if "top_p" in gen_conf: options["top_p"] = gen_conf["top_p"]
- if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]
+ if "temperature" in gen_conf:
+ options["temperature"] = gen_conf["temperature"]
+ if "max_tokens" in gen_conf:
+ options["num_predict"] = gen_conf["max_tokens"]
+ if "top_p" in gen_conf:
+ options["top_p"] = gen_conf["top_p"]
+ if "presence_penalty" in gen_conf:
+ options["presence_penalty"] = gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ options["frequency_penalty"] = gen_conf["frequency_penalty"]
ans = ""
try:
response = self.client.chat(
@@ -636,7 +658,8 @@ class MistralChat(Base):
messages=history,
**gen_conf)
for resp in response:
- if not resp.choices or not resp.choices[0].delta.content: continue
+ if not resp.choices or not resp.choices[0].delta.content:
+ continue
ans += resp.choices[0].delta.content
total_tokens += 1
if resp.choices[0].finish_reason == "length":
@@ -1196,7 +1219,8 @@ class SparkChat(Base):
assert model_name in model2version or model_name in version2model, f"The given model name is not supported yet. Support: {list(model2version.keys())}"
if model_name in model2version:
model_version = model2version[model_name]
- else: model_version = model_name
+ else:
+ model_version = model_name
super().__init__(key, model_version, base_url)
@@ -1281,8 +1305,10 @@ class AnthropicChat(Base):
self.system = system
if "max_tokens" not in gen_conf:
gen_conf["max_tokens"] = 4096
- if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]
+ if "presence_penalty" in gen_conf:
+ del gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ del gen_conf["frequency_penalty"]
ans = ""
try:
@@ -1312,8 +1338,10 @@ class AnthropicChat(Base):
self.system = system
if "max_tokens" not in gen_conf:
gen_conf["max_tokens"] = 4096
- if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"]
+ if "presence_penalty" in gen_conf:
+ del gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ del gen_conf["frequency_penalty"]
ans = ""
total_tokens = 0
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index 70e9f24ea5068dea4c8dc5acf42453768bfa7685..48a55b674724d314c48b031a5d715ad949484f1f 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -25,6 +25,7 @@ import base64
from io import BytesIO
import json
import requests
+from transformers import GenerationConfig
from rag.nlp import is_english
from api.utils import get_uuid
@@ -77,14 +78,16 @@ class Base(ABC):
stream=True
)
for resp in response:
- if not resp.choices[0].delta.content: continue
+ if not resp.choices[0].delta.content:
+ continue
delta = resp.choices[0].delta.content
ans += delta
if resp.choices[0].finish_reason == "length":
ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
[ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
tk_count = resp.usage.total_tokens
- if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens
+ if resp.choices[0].finish_reason == "stop":
+ tk_count = resp.usage.total_tokens
yield ans
except Exception as e:
yield ans + "\n**ERROR**: " + str(e)
@@ -99,7 +102,7 @@ class Base(ABC):
buffered = BytesIO()
try:
image.save(buffered, format="JPEG")
- except Exception as e:
+ except Exception:
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -139,7 +142,8 @@ class Base(ABC):
class GptV4(Base):
def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1"):
- if not base_url: base_url="https://api.openai.com/v1"
+ if not base_url:
+ base_url="https://api.openai.com/v1"
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name
self.lang = lang
@@ -149,7 +153,8 @@ class GptV4(Base):
prompt = self.prompt(b64)
for i in range(len(prompt)):
for c in prompt[i]["content"]:
- if "text" in c: c["type"] = "text"
+ if "text" in c:
+ c["type"] = "text"
res = self.client.chat.completions.create(
model=self.model_name,
@@ -171,7 +176,8 @@ class AzureGptV4(Base):
prompt = self.prompt(b64)
for i in range(len(prompt)):
for c in prompt[i]["content"]:
- if "text" in c: c["type"] = "text"
+ if "text" in c:
+ c["type"] = "text"
res = self.client.chat.completions.create(
model=self.model_name,
@@ -344,14 +350,16 @@ class Zhipu4V(Base):
stream=True
)
for resp in response:
- if not resp.choices[0].delta.content: continue
+ if not resp.choices[0].delta.content:
+ continue
delta = resp.choices[0].delta.content
ans += delta
if resp.choices[0].finish_reason == "length":
ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
[ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
tk_count = resp.usage.total_tokens
- if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens
+ if resp.choices[0].finish_reason == "stop":
+ tk_count = resp.usage.total_tokens
yield ans
except Exception as e:
yield ans + "\n**ERROR**: " + str(e)
@@ -389,11 +397,16 @@ class OllamaCV(Base):
if his["role"] == "user":
his["images"] = [image]
options = {}
- if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]
- if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]
- if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"]
- if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]
+ if "temperature" in gen_conf:
+ options["temperature"] = gen_conf["temperature"]
+ if "max_tokens" in gen_conf:
+ options["num_predict"] = gen_conf["max_tokens"]
+ if "top_p" in gen_conf:
+ options["top_k"] = gen_conf["top_p"]
+ if "presence_penalty" in gen_conf:
+ options["presence_penalty"] = gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ options["frequency_penalty"] = gen_conf["frequency_penalty"]
response = self.client.chat(
model=self.model_name,
messages=history,
@@ -414,11 +427,16 @@ class OllamaCV(Base):
if his["role"] == "user":
his["images"] = [image]
options = {}
- if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"]
- if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"]
- if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"]
- if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"]
- if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"]
+ if "temperature" in gen_conf:
+ options["temperature"] = gen_conf["temperature"]
+ if "max_tokens" in gen_conf:
+ options["num_predict"] = gen_conf["max_tokens"]
+ if "top_p" in gen_conf:
+ options["top_k"] = gen_conf["top_p"]
+ if "presence_penalty" in gen_conf:
+ options["presence_penalty"] = gen_conf["presence_penalty"]
+ if "frequency_penalty" in gen_conf:
+ options["frequency_penalty"] = gen_conf["frequency_penalty"]
ans = ""
try:
response = self.client.chat(
@@ -469,7 +487,7 @@ class XinferenceCV(Base):
class GeminiCV(Base):
def __init__(self, key, model_name="gemini-1.0-pro-vision-latest", lang="Chinese", **kwargs):
- from google.generativeai import client, GenerativeModel, GenerationConfig
+ from google.generativeai import client, GenerativeModel
client.configure(api_key=key)
_client = client.get_default_generative_client()
self.model_name = model_name
@@ -503,7 +521,7 @@ class GeminiCV(Base):
if his["role"] == "user":
his["parts"] = [his["content"]]
his.pop("content")
- history[-1]["parts"].append(f"data:image/jpeg;base64," + image)
+ history[-1]["parts"].append("data:image/jpeg;base64," + image)
response = self.model.generate_content(history, generation_config=GenerationConfig(
max_output_tokens=gen_conf.get("max_tokens", 1000), temperature=gen_conf.get("temperature", 0.3),
@@ -519,7 +537,6 @@ class GeminiCV(Base):
history[-1]["content"] = system + history[-1]["content"] + "user query: " + history[-1]["content"]
ans = ""
- tk_count = 0
try:
for his in history:
if his["role"] == "assistant":
@@ -529,14 +546,15 @@ class GeminiCV(Base):
if his["role"] == "user":
his["parts"] = [his["content"]]
his.pop("content")
- history[-1]["parts"].append(f"data:image/jpeg;base64," + image)
+ history[-1]["parts"].append("data:image/jpeg;base64," + image)
response = self.model.generate_content(history, generation_config=GenerationConfig(
max_output_tokens=gen_conf.get("max_tokens", 1000), temperature=gen_conf.get("temperature", 0.3),
top_p=gen_conf.get("top_p", 0.7)), stream=True)
for resp in response:
- if not resp.text: continue
+ if not resp.text:
+ continue
ans += resp.text
yield ans
except Exception as e:
@@ -632,7 +650,8 @@ class NvidiaCV(Base):
class StepFunCV(GptV4):
def __init__(self, key, model_name="step-1v-8k", lang="Chinese", base_url="https://api.stepfun.com/v1"):
- if not base_url: base_url="https://api.stepfun.com/v1"
+ if not base_url:
+ base_url="https://api.stepfun.com/v1"
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name
self.lang = lang
diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py
index 950ea10ec35bcde8afc5eea370d6e290885c2cdf..73c8f37dff9be62600843aa6c70159d14f04e7eb 100644
--- a/rag/llm/sequence2txt_model.py
+++ b/rag/llm/sequence2txt_model.py
@@ -15,12 +15,9 @@
#
import requests
from openai.lib.azure import AzureOpenAI
-from zhipuai import ZhipuAI
import io
from abc import ABC
-from ollama import Client
from openai import OpenAI
-import os
import json
from rag.utils import num_tokens_from_string
import base64
@@ -49,7 +46,8 @@ class Base(ABC):
class GPTSeq2txt(Base):
def __init__(self, key, model_name="whisper-1", base_url="https://api.openai.com/v1"):
- if not base_url: base_url = "https://api.openai.com/v1"
+ if not base_url:
+ base_url = "https://api.openai.com/v1"
self.client = OpenAI(api_key=key, base_url=base_url)
self.model_name = model_name
diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
index 814a5dfc37f1301e63fc98bc0e5ff8792c39c6d6..3ce0e44874532895da19f02ed03aff0a061640b2 100644
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@@ -16,7 +16,6 @@
import _thread as thread
import base64
-import datetime
import hashlib
import hmac
import json
@@ -175,7 +174,8 @@ class QwenTTS(Base):
class OpenAITTS(Base):
def __init__(self, key, model_name="tts-1", base_url="https://api.openai.com/v1"):
- if not base_url: base_url = "https://api.openai.com/v1"
+ if not base_url:
+ base_url = "https://api.openai.com/v1"
self.api_key = key
self.model_name = model_name
self.base_url = base_url
diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py
index ddca9b580a1f51fca71fc27e649e83fa1d1fb903..52687f276eac9e901e630bac89fcc01681d54c1e 100644
--- a/rag/nlp/__init__.py
+++ b/rag/nlp/__init__.py
@@ -222,7 +222,8 @@ def bullets_category(sections):
def is_english(texts):
eng = 0
- if not texts: return False
+ if not texts:
+ return False
for t in texts:
if re.match(r"[ `a-zA-Z.,':;/\"?<>!\(\)-]", t.strip()):
eng += 1
@@ -250,7 +251,8 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
res = []
# wrap up as es documents
for ck in chunks:
- if len(ck.strip()) == 0:continue
+ if len(ck.strip()) == 0:
+ continue
logging.debug("-- {}".format(ck))
d = copy.deepcopy(doc)
if pdf_parser:
@@ -269,7 +271,8 @@ def tokenize_chunks_docx(chunks, doc, eng, images):
res = []
# wrap up as es documents
for ck, image in zip(chunks, images):
- if len(ck.strip()) == 0:continue
+ if len(ck.strip()) == 0:
+ continue
logging.debug("-- {}".format(ck))
d = copy.deepcopy(doc)
d["image"] = image
@@ -288,8 +291,10 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
d = copy.deepcopy(doc)
tokenize(d, rows, eng)
d["content_with_weight"] = rows
- if img: d["image"] = img
- if poss: add_positions(d, poss)
+ if img:
+ d["image"] = img
+ if poss:
+ add_positions(d, poss)
res.append(d)
continue
de = "; " if eng else "; "
@@ -387,9 +392,9 @@ def title_frequency(bull, sections):
if re.search(r"(title|head)", layout) and not not_title(txt.split("@")[0]):
levels[i] = bullets_size
most_level = bullets_size+1
- for l, c in sorted(Counter(levels).items(), key=lambda x:x[1]*-1):
- if l <= bullets_size:
- most_level = l
+ for level, c in sorted(Counter(levels).items(), key=lambda x:x[1]*-1):
+ if level <= bullets_size:
+ most_level = level
break
return most_level, levels
@@ -504,7 +509,8 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"):
def add_chunk(t, pos):
nonlocal cks, tk_nums, delimiter
tnum = num_tokens_from_string(t)
- if not pos: pos = ""
+ if not pos:
+ pos = ""
if tnum < 8:
pos = ""
# Ensure that the length of the merged chunk does not exceed chunk_token_num
diff --git a/rag/nlp/query.py b/rag/nlp/query.py
index 6c018d2ca526c893e802224eab964c47c520d5aa..11e3f502aed60ffc99c6105926461621850dbb4f 100644
--- a/rag/nlp/query.py
+++ b/rag/nlp/query.py
@@ -121,7 +121,8 @@ class FulltextQueryer:
keywords.append(tt)
twts = self.tw.weights([tt])
syns = self.syn.lookup(tt)
- if syns and len(keywords) < 32: keywords.extend(syns)
+ if syns and len(keywords) < 32:
+ keywords.extend(syns)
logging.debug(json.dumps(twts, ensure_ascii=False))
tms = []
for tk, w in sorted(twts, key=lambda x: x[1] * -1):
@@ -147,7 +148,8 @@ class FulltextQueryer:
tk_syns = self.syn.lookup(tk)
tk_syns = [FulltextQueryer.subSpecialChar(s) for s in tk_syns]
- if len(keywords) < 32: keywords.extend([s for s in tk_syns if s])
+ if len(keywords) < 32:
+ keywords.extend([s for s in tk_syns if s])
tk_syns = [rag_tokenizer.fine_grained_tokenize(s) for s in tk_syns if s]
tk_syns = [f"\"{s}\"" if s.find(" ")>0 else s for s in tk_syns]
diff --git a/rag/nlp/rag_tokenizer.py b/rag/nlp/rag_tokenizer.py
index 0815daaeeef1bc6f3533d7b581c7c41781b2141b..953d940ee1c747490885e1a700f301f93addc407 100644
--- a/rag/nlp/rag_tokenizer.py
+++ b/rag/nlp/rag_tokenizer.py
@@ -104,7 +104,6 @@ class RagTokenizer:
return HanziConv.toSimplified(line)
def dfs_(self, chars, s, preTks, tkslist):
- MAX_L = 10
res = s
# if s > MAX_L or s>= len(chars):
if s >= len(chars):
@@ -184,12 +183,6 @@ class RagTokenizer:
return sorted(res, key=lambda x: x[1], reverse=True)
def merge_(self, tks):
- patts = [
- (r"[ ]+", " "),
- (r"([0-9\+\.,%\*=-]) ([0-9\+\.,%\*=-])", r"\1\2"),
- ]
- # for p,s in patts: tks = re.sub(p, s, tks)
-
# if split chars is part of token
res = []
tks = re.sub(r"[ ]+", " ", tks).split()
@@ -284,7 +277,8 @@ class RagTokenizer:
same = 0
while i + same < len(tks1) and j + same < len(tks) and tks1[i + same] == tks[j + same]:
same += 1
- if same > 0: res.append(" ".join(tks[j: j + same]))
+ if same > 0:
+ res.append(" ".join(tks[j: j + same]))
_i = i + same
_j = j + same
j = _j + 1
diff --git a/rag/nlp/term_weight.py b/rag/nlp/term_weight.py
index bbf446ddfb63644b4b609e712225d0bc39529299..6ab49a2e3c950e7cdb8a0c3c35f915489245dbac 100644
--- a/rag/nlp/term_weight.py
+++ b/rag/nlp/term_weight.py
@@ -62,10 +62,10 @@ class Dealer:
res = {}
f = open(fnm, "r")
while True:
- l = f.readline()
- if not l:
+ line = f.readline()
+ if not line:
break
- arr = l.replace("\n", "").split("\t")
+ arr = line.replace("\n", "").split("\t")
if len(arr) < 2:
res[arr[0]] = 0
else:
diff --git a/rag/raptor.py b/rag/raptor.py
index 51f1ad1177d20cad6105d15e776ea93f13b59319..6e11cf683e57c56bcc6777451ce8e55db4d7cfe5 100644
--- a/rag/raptor.py
+++ b/rag/raptor.py
@@ -47,7 +47,8 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
def __call__(self, chunks, random_state, callback=None):
layers = [(0, len(chunks))]
start, end = 0, len(chunks)
- if len(chunks) <= 1: return
+ if len(chunks) <= 1:
+ return
chunks = [(s, a) for s, a in chunks if len(a) > 0]
def summarize(ck_idx, lock):
@@ -66,7 +67,8 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
logging.debug(f"SUM: {cnt}")
embds, _ = self._embd_model.encode([cnt])
with lock:
- if not len(embds[0]): return
+ if not len(embds[0]):
+ return
chunks.append((cnt, embds[0]))
except Exception as e:
logging.exception("summarize got exception")
diff --git a/rag/svr/cache_file_svr.py b/rag/svr/cache_file_svr.py
index 98769d3b53261eb6db46ea85f491677466020cc6..8b96a2af5b7d9e4b843c643c23bd23d7c6f0892d 100644
--- a/rag/svr/cache_file_svr.py
+++ b/rag/svr/cache_file_svr.py
@@ -33,14 +33,16 @@ def collect():
def main():
locations = collect()
- if not locations:return
+ if not locations:
+ return
logging.info(f"TASKS: {len(locations)}")
for kb_id, loc in locations:
try:
if REDIS_CONN.is_alive():
try:
key = "{}/{}".format(kb_id, loc)
- if REDIS_CONN.exist(key):continue
+ if REDIS_CONN.exist(key):
+ continue
file_bin = STORAGE_IMPL.get(kb_id, loc)
REDIS_CONN.transaction(key, file_bin, 12 * 60)
logging.info("CACHE: {}".format(loc))
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
index 7ab4b72540018620403ad4f1584cae3a56be0d00..aed8b7fa4a3a56276235047d183223ffe8e50e84 100644
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@@ -23,18 +23,12 @@ import os
from api.utils.log_utils import initRootLogger
-CONSUMER_NO = "0" if len(sys.argv) < 2 else sys.argv[1]
-CONSUMER_NAME = "task_executor_" + CONSUMER_NO
-LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
-initRootLogger(CONSUMER_NAME, LOG_LEVELS)
from datetime import datetime
import json
-import os
import hashlib
import copy
import re
-import sys
import time
import threading
from functools import partial
@@ -63,6 +57,11 @@ from rag.utils import rmSpace, num_tokens_from_string
from rag.utils.redis_conn import REDIS_CONN, Payload
from rag.utils.storage_factory import STORAGE_IMPL
+CONSUMER_NO = "0" if len(sys.argv) < 2 else sys.argv[1]
+CONSUMER_NAME = "task_executor_" + CONSUMER_NO
+LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
+initRootLogger(CONSUMER_NAME, LOG_LEVELS)
+
BATCH_SIZE = 64
FACTORY = {
@@ -201,7 +200,8 @@ def build_chunks(task, progress_callback):
"doc_id": task["doc_id"],
"kb_id": str(task["kb_id"])
}
- if task["pagerank"]: doc["pagerank_fea"] = int(task["pagerank"])
+ if task["pagerank"]:
+ doc["pagerank_fea"] = int(task["pagerank"])
el = 0
for ck in cks:
d = copy.deepcopy(doc)
@@ -342,7 +342,8 @@ def run_raptor(row, chat_mdl, embd_mdl, callback=None):
"docnm_kwd": row["name"],
"title_tks": rag_tokenizer.tokenize(row["name"])
}
- if row["pagerank"]: doc["pagerank_fea"] = int(row["pagerank"])
+ if row["pagerank"]:
+ doc["pagerank_fea"] = int(row["pagerank"])
res = []
tk_count = 0
for content, vctr in chunks[original_length:]:
diff --git a/rag/utils/__init__.py b/rag/utils/__init__.py
index d75fb69c3deb79692e2eb00bafcbd6867d296403..e68e437354f691b91b0b2baf0d2019796b81974f 100644
--- a/rag/utils/__init__.py
+++ b/rag/utils/__init__.py
@@ -41,15 +41,15 @@ def findMaxDt(fnm):
try:
with open(fnm, "r") as f:
while True:
- l = f.readline()
- if not l:
+ line = f.readline()
+ if not line:
break
- l = l.strip("\n")
- if l == 'nan':
+ line = line.strip("\n")
+ if line == 'nan':
continue
- if l > m:
- m = l
- except Exception as e:
+ if line > m:
+ m = line
+ except Exception:
pass
return m
@@ -59,15 +59,15 @@ def findMaxTm(fnm):
try:
with open(fnm, "r") as f:
while True:
- l = f.readline()
- if not l:
+ line = f.readline()
+ if not line:
break
- l = l.strip("\n")
- if l == 'nan':
+ line = line.strip("\n")
+ if line == 'nan':
continue
- if int(l) > m:
- m = int(l)
- except Exception as e:
+ if int(line) > m:
+ m = int(line)
+ except Exception:
pass
return m
diff --git a/rag/utils/azure_sas_conn.py b/rag/utils/azure_sas_conn.py
index 275f7fe6b18c07fa4c5d4dbd897d8624a60c3d3f..8c1be4cd05786614a2c95a056734d1d70278687f 100644
--- a/rag/utils/azure_sas_conn.py
+++ b/rag/utils/azure_sas_conn.py
@@ -32,7 +32,7 @@ class RAGFlowAzureSasBlob(object):
self.conn = None
def health(self):
- bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
+ _bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
return self.conn.upload_blob(name=fnm, data=BytesIO(binary), length=len(binary))
def put(self, bucket, fnm, binary):
diff --git a/rag/utils/azure_spn_conn.py b/rag/utils/azure_spn_conn.py
index 7081f892d1a7408d28eaa040db9eb7cff3150ce1..c45ceb79527d333540d559df157f712be4c26d44 100644
--- a/rag/utils/azure_spn_conn.py
+++ b/rag/utils/azure_spn_conn.py
@@ -36,7 +36,7 @@ class RAGFlowAzureSpnBlob(object):
self.conn = None
def health(self):
- bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
+ _bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
f = self.conn.create_file(fnm)
f.append_data(binary, offset=0, length=len(binary))
return f.flush_data(len(binary))
diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py
index 059d4f00f295fa431c50f862b3aef3a8be0e3e96..a473833f4572d95904546d84aeca37c9865a4f74 100644
--- a/rag/utils/es_conn.py
+++ b/rag/utils/es_conn.py
@@ -132,7 +132,8 @@ class ESConnection(DocStoreConnection):
bqry.filter.append(
Q("bool", must_not=Q("range", available_int={"lt": 1})))
continue
- if not v: continue
+ if not v:
+ continue
if isinstance(v, list):
bqry.filter.append(Q("terms", **{k: v}))
elif isinstance(v, str) or isinstance(v, int):
diff --git a/sdk/python/ragflow_sdk/__init__.py b/sdk/python/ragflow_sdk/__init__.py
index a99e97c6beabbf19635ff392915ac1d91b215d93..f8df6aaf056df12727d90284659fd44a51b337df 100644
--- a/sdk/python/ragflow_sdk/__init__.py
+++ b/sdk/python/ragflow_sdk/__init__.py
@@ -1,14 +1,21 @@
-from beartype.claw import beartype_this_package
-beartype_this_package() # <-- raise exceptions in your code
-
import importlib.metadata
-__version__ = importlib.metadata.version("ragflow_sdk")
-
from .ragflow import RAGFlow
from .modules.dataset import DataSet
from .modules.chat import Chat
from .modules.session import Session
from .modules.document import Document
from .modules.chunk import Chunk
-from .modules.agent import Agent
\ No newline at end of file
+from .modules.agent import Agent
+
+__version__ = importlib.metadata.version("ragflow_sdk")
+
+__all__ = [
+ "RAGFlow",
+ "DataSet",
+ "Chat",
+ "Session",
+ "Document",
+ "Chunk",
+ "Agent"
+]
\ No newline at end of file
diff --git a/sdk/python/ragflow_sdk/modules/session.py b/sdk/python/ragflow_sdk/modules/session.py
index f5c2072ee5f62ceea79942038a896f0e0821a242..539c1ce89bad80c0cb449346e956359b25623c5e 100644
--- a/sdk/python/ragflow_sdk/modules/session.py
+++ b/sdk/python/ragflow_sdk/modules/session.py
@@ -29,7 +29,7 @@ class Session(Base):
raise Exception(json_data["message"])
if line.startswith("data:"):
json_data = json.loads(line[5:])
- if json_data["data"] != True:
+ if not json_data["data"]:
answer = json_data["data"]["answer"]
reference = json_data["data"]["reference"]
temp_dict = {
diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py
index cd4df05f3837290c02068cf2f165034fc2c4ebe3..463d0a8813396f3431df0da4fb060d06e5d75266 100644
--- a/sdk/python/test/conftest.py
+++ b/sdk/python/test/conftest.py
@@ -1,5 +1,3 @@
-import string
-import random
import os
import pytest
import requests
diff --git a/sdk/python/test/test_frontend_api/common.py b/sdk/python/test/test_frontend_api/common.py
index 4e98357635be205445873d4c19ac48d696d868d9..1a8c174c53da4f9bb9757f4876257416e610e816 100644
--- a/sdk/python/test/test_frontend_api/common.py
+++ b/sdk/python/test/test_frontend_api/common.py
@@ -39,7 +39,6 @@ def update_dataset(auth, json_req):
def upload_file(auth, dataset_id, path):
authorization = {"Authorization": auth}
url = f"{HOST_ADDRESS}/v1/document/upload"
- base_name = os.path.basename(path)
json_req = {
"kb_id": dataset_id,
}
diff --git a/sdk/python/test/test_frontend_api/get_email.py b/sdk/python/test/test_frontend_api/get_email.py
index df053fa768b636e0666c177036b10c5afbe5edcc..923b82d2d21ad75ebe0fd0556a47fd1e65180a67 100644
--- a/sdk/python/test/test_frontend_api/get_email.py
+++ b/sdk/python/test/test_frontend_api/get_email.py
@@ -1,3 +1,3 @@
def test_get_email(get_email):
- print(f"\nEmail account:",flush=True)
+ print("\nEmail account:",flush=True)
print(f"{get_email}\n",flush=True)
\ No newline at end of file
diff --git a/sdk/python/test/test_frontend_api/test_chunk.py b/sdk/python/test/test_frontend_api/test_chunk.py
index 555b9360198a78bd09c2a5c0a92509b8cc69d4cc..afcab865d5a64eca9d9a0507d069d1b7f02e2231 100644
--- a/sdk/python/test/test_frontend_api/test_chunk.py
+++ b/sdk/python/test/test_frontend_api/test_chunk.py
@@ -13,14 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, upload_file, DATASET_NAME_LIMIT
+from common import create_dataset, list_dataset, rm_dataset, upload_file
from common import list_document, get_docs_info, parse_docs
from time import sleep
from timeit import default_timer as timer
-import re
-import pytest
-import random
-import string
def test_parse_txt_document(get_auth):
diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py
index 8de82282935c2a8b7780296de0ec58a6875dfb03..fe2de715a8c4f72c59bee6b7a65bef1c49360e6e 100644
--- a/sdk/python/test/test_frontend_api/test_dataset.py
+++ b/sdk/python/test/test_frontend_api/test_dataset.py
@@ -1,6 +1,5 @@
-from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, DATASET_NAME_LIMIT
+from common import create_dataset, list_dataset, rm_dataset, update_dataset, DATASET_NAME_LIMIT
import re
-import pytest
import random
import string
@@ -33,8 +32,6 @@ def test_dataset(get_auth):
def test_dataset_1k_dataset(get_auth):
# create dataset
- authorization = {"Authorization": get_auth}
- url = f"{HOST_ADDRESS}/v1/kb/create"
for i in range(1000):
res = create_dataset(get_auth, f"test_create_dataset_{i}")
assert res.get("code") == 0, f"{res.get('message')}"
@@ -76,7 +73,7 @@ def test_duplicated_name_dataset(get_auth):
dataset_id = item.get("id")
dataset_list.append(dataset_id)
match = re.match(pattern, dataset_name)
- assert match != None
+ assert match is not None
for dataset_id in dataset_list:
res = rm_dataset(get_auth, dataset_id)
diff --git a/sdk/python/test/test_sdk_api/get_email.py b/sdk/python/test/test_sdk_api/get_email.py
index df053fa768b636e0666c177036b10c5afbe5edcc..923b82d2d21ad75ebe0fd0556a47fd1e65180a67 100644
--- a/sdk/python/test/test_sdk_api/get_email.py
+++ b/sdk/python/test/test_sdk_api/get_email.py
@@ -1,3 +1,3 @@
def test_get_email(get_email):
- print(f"\nEmail account:",flush=True)
+ print("\nEmail account:",flush=True)
print(f"{get_email}\n",flush=True)
\ No newline at end of file
diff --git a/sdk/python/test/test_sdk_api/t_agent.py b/sdk/python/test/test_sdk_api/t_agent.py
index 2c092e85a1c47cf9e4d82ee196661ec9914e2ba1..f9f29c7e32d10ce912b04c3cbea460edfb80427a 100644
--- a/sdk/python/test/test_sdk_api/t_agent.py
+++ b/sdk/python/test/test_sdk_api/t_agent.py
@@ -1,4 +1,4 @@
-from ragflow_sdk import RAGFlow,Agent
+from ragflow_sdk import RAGFlow
from common import HOST_ADDRESS
import pytest