Commit 
							
							·
						
						7dd2893
	
1
								Parent(s):
							
							c60dccb
								
Update info (#1005)
Browse files### What problem does this PR solve?
_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._
### Type of change
- [x] Refactoring
Signed-off-by: Jin Hai <[email protected]>
- rag/app/manual.py +16 -0
- rag/llm/rpc_server.py +16 -0
- rag/nlp/__init__.py +16 -0
- rag/nlp/query.py +15 -1
- rag/nlp/rag_tokenizer.py +15 -1
- rag/nlp/search.py +16 -1
- rag/nlp/surname.py +16 -1
- rag/nlp/synonym.py +16 -0
- rag/nlp/term_weight.py +16 -1
- rag/svr/cache_file_svr.py +15 -0
- rag/utils/__init__.py +16 -0
    	
        rag/app/manual.py
    CHANGED
    
    | @@ -1,3 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import copy
         | 
| 2 | 
             
            import re
         | 
| 3 |  | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            import copy
         | 
| 18 | 
             
            import re
         | 
| 19 |  | 
    	
        rag/llm/rpc_server.py
    CHANGED
    
    | @@ -1,3 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import argparse
         | 
| 2 | 
             
            import pickle
         | 
| 3 | 
             
            import random
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            import argparse
         | 
| 18 | 
             
            import pickle
         | 
| 19 | 
             
            import random
         | 
    	
        rag/nlp/__init__.py
    CHANGED
    
    | @@ -1,3 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import random
         | 
| 2 | 
             
            from collections import Counter
         | 
| 3 |  | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            import random
         | 
| 18 | 
             
            from collections import Counter
         | 
| 19 |  | 
    	
        rag/nlp/query.py
    CHANGED
    
    | @@ -1,4 +1,18 @@ | |
| 1 | 
            -
            # | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 |  | 
| 3 | 
             
            import json
         | 
| 4 | 
             
            import math
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 |  | 
| 17 | 
             
            import json
         | 
| 18 | 
             
            import math
         | 
    	
        rag/nlp/rag_tokenizer.py
    CHANGED
    
    | @@ -1,4 +1,18 @@ | |
| 1 | 
            -
            # | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 |  | 
| 3 | 
             
            import copy
         | 
| 4 | 
             
            import datrie
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 |  | 
| 17 | 
             
            import copy
         | 
| 18 | 
             
            import datrie
         | 
    	
        rag/nlp/search.py
    CHANGED
    
    | @@ -1,4 +1,19 @@ | |
| 1 | 
            -
            # | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 | 
             
            import json
         | 
| 3 | 
             
            import re
         | 
| 4 | 
             
            from copy import deepcopy
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            import json
         | 
| 18 | 
             
            import re
         | 
| 19 | 
             
            from copy import deepcopy
         | 
    	
        rag/nlp/surname.py
    CHANGED
    
    | @@ -1,4 +1,19 @@ | |
| 1 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 | 
             
            m = set(["赵","钱","孙","李",
         | 
| 3 | 
             
            "周","吴","郑","王",
         | 
| 4 | 
             
            "冯","陈","褚","卫",
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            m = set(["赵","钱","孙","李",
         | 
| 18 | 
             
            "周","吴","郑","王",
         | 
| 19 | 
             
            "冯","陈","褚","卫",
         | 
    	
        rag/nlp/synonym.py
    CHANGED
    
    | @@ -1,3 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import json
         | 
| 2 | 
             
            import os
         | 
| 3 | 
             
            import time
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            import json
         | 
| 18 | 
             
            import os
         | 
| 19 | 
             
            import time
         | 
    	
        rag/nlp/term_weight.py
    CHANGED
    
    | @@ -1,4 +1,19 @@ | |
| 1 | 
            -
            # | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 | 
             
            import math
         | 
| 3 | 
             
            import json
         | 
| 4 | 
             
            import re
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            import math
         | 
| 18 | 
             
            import json
         | 
| 19 | 
             
            import re
         | 
    	
        rag/svr/cache_file_svr.py
    CHANGED
    
    | @@ -1,3 +1,18 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import random
         | 
| 2 | 
             
            import time
         | 
| 3 | 
             
            import traceback
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
             
            import random
         | 
| 17 | 
             
            import time
         | 
| 18 | 
             
            import traceback
         | 
    	
        rag/utils/__init__.py
    CHANGED
    
    | @@ -1,3 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
             
            import re
         | 
| 3 | 
             
            import tiktoken
         | 
|  | |
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            #  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            #  You may obtain a copy of the License at
         | 
| 7 | 
            +
            #
         | 
| 8 | 
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            #  limitations under the License.
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
             | 
| 17 | 
             
            import os
         | 
| 18 | 
             
            import re
         | 
| 19 | 
             
            import tiktoken
         |