Commit
·
43bceb7
1
Parent(s):
8446e15
Fix parsing JSON file error (#3829)
Browse files### What problem does this PR solve?
Close issue: #3828
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
Signed-off-by: jinhai <[email protected]>
- deepdoc/parser/json_parser.py +2 -2
- rag/app/naive.py +2 -1
deepdoc/parser/json_parser.py
CHANGED
|
@@ -92,9 +92,9 @@ class RAGFlowJsonParser:
|
|
| 92 |
"""Splits JSON into a list of JSON chunks"""
|
| 93 |
|
| 94 |
if convert_lists:
|
| 95 |
-
chunks = self._json_split(self._list_to_dict_preprocessing(json_data))
|
| 96 |
else:
|
| 97 |
-
chunks = self._json_split(json_data)
|
| 98 |
|
| 99 |
# Remove the last chunk if it's empty
|
| 100 |
if not chunks[-1]:
|
|
|
|
| 92 |
"""Splits JSON into a list of JSON chunks"""
|
| 93 |
|
| 94 |
if convert_lists:
|
| 95 |
+
chunks = self._json_split(self._list_to_dict_preprocessing(json_data), None, None)
|
| 96 |
else:
|
| 97 |
+
chunks = self._json_split(json_data, None, None)
|
| 98 |
|
| 99 |
# Remove the last chunk if it's empty
|
| 100 |
if not chunks[-1]:
|
rag/app/naive.py
CHANGED
|
@@ -258,7 +258,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
| 258 |
|
| 259 |
elif re.search(r"\.json$", filename, re.IGNORECASE):
|
| 260 |
callback(0.1, "Start to parse.")
|
| 261 |
-
|
|
|
|
| 262 |
sections = [(_, "") for _ in sections if _]
|
| 263 |
callback(0.8, "Finish parsing.")
|
| 264 |
|
|
|
|
| 258 |
|
| 259 |
elif re.search(r"\.json$", filename, re.IGNORECASE):
|
| 260 |
callback(0.1, "Start to parse.")
|
| 261 |
+
chunk_token_num = int(parser_config.get("chunk_token_num", 128))
|
| 262 |
+
sections = JsonParser(chunk_token_num)(binary)
|
| 263 |
sections = [(_, "") for _ in sections if _]
|
| 264 |
callback(0.8, "Finish parsing.")
|
| 265 |
|