Kevin Hu
commited on
Commit
·
1f75d02
1
Parent(s):
2118d99
fix parameter error (#1925)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
deepdoc/parser/txt_parser.py
CHANGED
|
@@ -15,7 +15,7 @@ from rag.nlp import find_codec,num_tokens_from_string
|
|
| 15 |
import re
|
| 16 |
|
| 17 |
class RAGFlowTxtParser:
|
| 18 |
-
def __call__(self, fnm, binary=None, chunk_token_num=128):
|
| 19 |
txt = ""
|
| 20 |
if binary:
|
| 21 |
encoding = find_codec(binary)
|
|
@@ -27,7 +27,7 @@ class RAGFlowTxtParser:
|
|
| 27 |
if not l:
|
| 28 |
break
|
| 29 |
txt += l
|
| 30 |
-
return self.parser_txt(txt, chunk_token_num)
|
| 31 |
|
| 32 |
@classmethod
|
| 33 |
def parser_txt(cls, txt, chunk_token_num=128, delimiter="\n!?;。;!?"):
|
|
|
|
| 15 |
import re
|
| 16 |
|
| 17 |
class RAGFlowTxtParser:
|
| 18 |
+
def __call__(self, fnm, binary=None, chunk_token_num=128, delimiter="\n!?;。;!?"):
|
| 19 |
txt = ""
|
| 20 |
if binary:
|
| 21 |
encoding = find_codec(binary)
|
|
|
|
| 27 |
if not l:
|
| 28 |
break
|
| 29 |
txt += l
|
| 30 |
+
return self.parser_txt(txt, chunk_token_num, delimiter)
|
| 31 |
|
| 32 |
@classmethod
|
| 33 |
def parser_txt(cls, txt, chunk_token_num=128, delimiter="\n!?;。;!?"):
|