KevinHuSh
commited on
Commit
·
f1ccc7f
1
Parent(s):
79ada0b
READEME refined (#156)
Browse files- README.md +14 -13
- rag/app/naive.py +4 -2
- rag/nlp/search.py +1 -1
README.md
CHANGED
@@ -79,10 +79,6 @@ vm.max_map_count=262144
|
|
79 |
If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
|
80 |
|
81 |
## Quick Start
|
82 |
-
> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
|
83 |
-
|
84 |
-
> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a
|
85 |
-
> configuration of the back-end service and should be consistent with [.env](./docker/.env).
|
86 |
|
87 |
> - In [service_conf.yaml](./docker/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended.
|
88 |
> In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
|
@@ -91,25 +87,25 @@ If your machine doesn't have *Docker* installed, please refer to [Install Docker
|
|
91 |
> [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model),
|
92 |
> [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs)
|
93 |
```bash
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
```
|
98 |
### OR
|
99 |
|
100 |
```bash
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
```
|
107 |
> The core image is about 15GB, please be patient for the first time
|
108 |
|
109 |
After pulling all the images and running up, use the following command to check the server status. If you can have the following outputs,
|
110 |
_**Hallelujah!**_ You have successfully launched the system.
|
111 |
```bash
|
112 |
-
|
113 |
|
114 |
____ ______ __
|
115 |
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
@@ -139,6 +135,11 @@ If you need to change the default setting of the system when you deploy it. Ther
|
|
139 |
Please refer to [README](./docker/README.md) and manually set the configuration.
|
140 |
After changing something, please run *docker-compose up -d* again.
|
141 |
|
|
|
|
|
|
|
|
|
|
|
142 |
# RoadMap
|
143 |
|
144 |
- [ ] File manager.
|
|
|
79 |
If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
|
80 |
|
81 |
## Quick Start
|
|
|
|
|
|
|
|
|
82 |
|
83 |
> - In [service_conf.yaml](./docker/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended.
|
84 |
> In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
|
|
|
87 |
> [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model),
|
88 |
> [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs)
|
89 |
```bash
|
90 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
91 |
+
$ cd ragflow/docker
|
92 |
+
$ docker compose up -d
|
93 |
```
|
94 |
### OR
|
95 |
|
96 |
```bash
|
97 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
98 |
+
$ cd ragflow/
|
99 |
+
$ docker build -t infiniflow/ragflow:v1.0 .
|
100 |
+
$ cd ragflow/docker
|
101 |
+
$ docker compose up -d
|
102 |
```
|
103 |
> The core image is about 15GB, please be patient for the first time
|
104 |
|
105 |
After pulling all the images and running up, use the following command to check the server status. If you can have the following outputs,
|
106 |
_**Hallelujah!**_ You have successfully launched the system.
|
107 |
```bash
|
108 |
+
$ docker logs -f ragflow-server
|
109 |
|
110 |
____ ______ __
|
111 |
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
|
|
135 |
Please refer to [README](./docker/README.md) and manually set the configuration.
|
136 |
After changing something, please run *docker-compose up -d* again.
|
137 |
|
138 |
+
> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
|
139 |
+
|
140 |
+
> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a
|
141 |
+
> configuration of the back-end service and should be consistent with [.env](./docker/.env).
|
142 |
+
|
143 |
# RoadMap
|
144 |
|
145 |
- [ ] File manager.
|
rag/app/naive.py
CHANGED
@@ -42,7 +42,9 @@ class Pdf(PdfParser):
|
|
42 |
self._text_merge()
|
43 |
callback(0.67, "Text merging finished")
|
44 |
tbls = self._extract_table_figure(True, zoomin, True, True)
|
45 |
-
self._naive_vertical_merge()
|
|
|
|
|
46 |
|
47 |
cron_logger.info("paddle layouts:".format(
|
48 |
(timer() - start) / (self.total_page + 0.1)))
|
@@ -79,7 +81,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
79 |
|
80 |
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
81 |
pdf_parser = Pdf(
|
82 |
-
) if parser_config
|
83 |
sections, tbls = pdf_parser(filename if not binary else binary,
|
84 |
from_page=from_page, to_page=to_page, callback=callback)
|
85 |
res = tokenize_table(tbls, doc, eng)
|
|
|
42 |
self._text_merge()
|
43 |
callback(0.67, "Text merging finished")
|
44 |
tbls = self._extract_table_figure(True, zoomin, True, True)
|
45 |
+
#self._naive_vertical_merge()
|
46 |
+
self._concat_downward()
|
47 |
+
#self._filter_forpages()
|
48 |
|
49 |
cron_logger.info("paddle layouts:".format(
|
50 |
(timer() - start) / (self.total_page + 0.1)))
|
|
|
81 |
|
82 |
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
83 |
pdf_parser = Pdf(
|
84 |
+
) if parser_config.get("layout_recognize", True) else PlainParser()
|
85 |
sections, tbls = pdf_parser(filename if not binary else binary,
|
86 |
from_page=from_page, to_page=to_page, callback=callback)
|
87 |
res = tokenize_table(tbls, doc, eng)
|
rag/nlp/search.py
CHANGED
@@ -7,7 +7,6 @@ from elasticsearch_dsl import Q, Search
|
|
7 |
from typing import List, Optional, Dict, Union
|
8 |
from dataclasses import dataclass
|
9 |
|
10 |
-
from api.settings import chat_logger
|
11 |
from rag.settings import es_logger
|
12 |
from rag.utils import rmSpace
|
13 |
from rag.nlp import huqie, query
|
@@ -365,6 +364,7 @@ class Dealer:
|
|
365 |
return ranks
|
366 |
|
367 |
def sql_retrieval(self, sql, fetch_size=128, format="json"):
|
|
|
368 |
sql = re.sub(r"[ ]+", " ", sql)
|
369 |
sql = sql.replace("%", "")
|
370 |
es_logger.info(f"Get es sql: {sql}")
|
|
|
7 |
from typing import List, Optional, Dict, Union
|
8 |
from dataclasses import dataclass
|
9 |
|
|
|
10 |
from rag.settings import es_logger
|
11 |
from rag.utils import rmSpace
|
12 |
from rag.nlp import huqie, query
|
|
|
364 |
return ranks
|
365 |
|
366 |
def sql_retrieval(self, sql, fetch_size=128, format="json"):
|
367 |
+
from api.settings import chat_logger
|
368 |
sql = re.sub(r"[ ]+", " ", sql)
|
369 |
sql = sql.replace("%", "")
|
370 |
es_logger.info(f"Get es sql: {sql}")
|