KevinHuSh
commited on
Commit
·
f1ccc7f
1
Parent(s):
79ada0b
READEME refined (#156)
Browse files- README.md +14 -13
- rag/app/naive.py +4 -2
- rag/nlp/search.py +1 -1
README.md
CHANGED
|
@@ -79,10 +79,6 @@ vm.max_map_count=262144
|
|
| 79 |
If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
|
| 80 |
|
| 81 |
## Quick Start
|
| 82 |
-
> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
|
| 83 |
-
|
| 84 |
-
> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a
|
| 85 |
-
> configuration of the back-end service and should be consistent with [.env](./docker/.env).
|
| 86 |
|
| 87 |
> - In [service_conf.yaml](./docker/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended.
|
| 88 |
> In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
|
|
@@ -91,25 +87,25 @@ If your machine doesn't have *Docker* installed, please refer to [Install Docker
|
|
| 91 |
> [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model),
|
| 92 |
> [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs)
|
| 93 |
```bash
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
```
|
| 98 |
### OR
|
| 99 |
|
| 100 |
```bash
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
```
|
| 107 |
> The core image is about 15GB, please be patient for the first time
|
| 108 |
|
| 109 |
After pulling all the images and running up, use the following command to check the server status. If you can have the following outputs,
|
| 110 |
_**Hallelujah!**_ You have successfully launched the system.
|
| 111 |
```bash
|
| 112 |
-
|
| 113 |
|
| 114 |
____ ______ __
|
| 115 |
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
|
@@ -139,6 +135,11 @@ If you need to change the default setting of the system when you deploy it. Ther
|
|
| 139 |
Please refer to [README](./docker/README.md) and manually set the configuration.
|
| 140 |
After changing something, please run *docker-compose up -d* again.
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
# RoadMap
|
| 143 |
|
| 144 |
- [ ] File manager.
|
|
|
|
| 79 |
If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
|
| 80 |
|
| 81 |
## Quick Start
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
> - In [service_conf.yaml](./docker/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended.
|
| 84 |
> In **user_default_llm** of [service_conf.yaml](./docker/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
|
|
|
|
| 87 |
> [OpenAI](https://platform.openai.com/login?launch), [Tongyi-Qianwen](https://dashscope.console.aliyun.com/model),
|
| 88 |
> [ZHIPU-AI](https://open.bigmodel.cn/), [Moonshot](https://platform.moonshot.cn/docs/docs)
|
| 89 |
```bash
|
| 90 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
| 91 |
+
$ cd ragflow/docker
|
| 92 |
+
$ docker compose up -d
|
| 93 |
```
|
| 94 |
### OR
|
| 95 |
|
| 96 |
```bash
|
| 97 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
| 98 |
+
$ cd ragflow/
|
| 99 |
+
$ docker build -t infiniflow/ragflow:v1.0 .
|
| 100 |
+
$ cd ragflow/docker
|
| 101 |
+
$ docker compose up -d
|
| 102 |
```
|
| 103 |
> The core image is about 15GB, please be patient for the first time
|
| 104 |
|
| 105 |
After pulling all the images and running up, use the following command to check the server status. If you can have the following outputs,
|
| 106 |
_**Hallelujah!**_ You have successfully launched the system.
|
| 107 |
```bash
|
| 108 |
+
$ docker logs -f ragflow-server
|
| 109 |
|
| 110 |
____ ______ __
|
| 111 |
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
|
|
|
| 135 |
Please refer to [README](./docker/README.md) and manually set the configuration.
|
| 136 |
After changing something, please run *docker-compose up -d* again.
|
| 137 |
|
| 138 |
+
> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
|
| 139 |
+
|
| 140 |
+
> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./docker/service_conf.yaml) which is a
|
| 141 |
+
> configuration of the back-end service and should be consistent with [.env](./docker/.env).
|
| 142 |
+
|
| 143 |
# RoadMap
|
| 144 |
|
| 145 |
- [ ] File manager.
|
rag/app/naive.py
CHANGED
|
@@ -42,7 +42,9 @@ class Pdf(PdfParser):
|
|
| 42 |
self._text_merge()
|
| 43 |
callback(0.67, "Text merging finished")
|
| 44 |
tbls = self._extract_table_figure(True, zoomin, True, True)
|
| 45 |
-
self._naive_vertical_merge()
|
|
|
|
|
|
|
| 46 |
|
| 47 |
cron_logger.info("paddle layouts:".format(
|
| 48 |
(timer() - start) / (self.total_page + 0.1)))
|
|
@@ -79,7 +81,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
| 79 |
|
| 80 |
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
| 81 |
pdf_parser = Pdf(
|
| 82 |
-
) if parser_config
|
| 83 |
sections, tbls = pdf_parser(filename if not binary else binary,
|
| 84 |
from_page=from_page, to_page=to_page, callback=callback)
|
| 85 |
res = tokenize_table(tbls, doc, eng)
|
|
|
|
| 42 |
self._text_merge()
|
| 43 |
callback(0.67, "Text merging finished")
|
| 44 |
tbls = self._extract_table_figure(True, zoomin, True, True)
|
| 45 |
+
#self._naive_vertical_merge()
|
| 46 |
+
self._concat_downward()
|
| 47 |
+
#self._filter_forpages()
|
| 48 |
|
| 49 |
cron_logger.info("paddle layouts:".format(
|
| 50 |
(timer() - start) / (self.total_page + 0.1)))
|
|
|
|
| 81 |
|
| 82 |
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
| 83 |
pdf_parser = Pdf(
|
| 84 |
+
) if parser_config.get("layout_recognize", True) else PlainParser()
|
| 85 |
sections, tbls = pdf_parser(filename if not binary else binary,
|
| 86 |
from_page=from_page, to_page=to_page, callback=callback)
|
| 87 |
res = tokenize_table(tbls, doc, eng)
|
rag/nlp/search.py
CHANGED
|
@@ -7,7 +7,6 @@ from elasticsearch_dsl import Q, Search
|
|
| 7 |
from typing import List, Optional, Dict, Union
|
| 8 |
from dataclasses import dataclass
|
| 9 |
|
| 10 |
-
from api.settings import chat_logger
|
| 11 |
from rag.settings import es_logger
|
| 12 |
from rag.utils import rmSpace
|
| 13 |
from rag.nlp import huqie, query
|
|
@@ -365,6 +364,7 @@ class Dealer:
|
|
| 365 |
return ranks
|
| 366 |
|
| 367 |
def sql_retrieval(self, sql, fetch_size=128, format="json"):
|
|
|
|
| 368 |
sql = re.sub(r"[ ]+", " ", sql)
|
| 369 |
sql = sql.replace("%", "")
|
| 370 |
es_logger.info(f"Get es sql: {sql}")
|
|
|
|
| 7 |
from typing import List, Optional, Dict, Union
|
| 8 |
from dataclasses import dataclass
|
| 9 |
|
|
|
|
| 10 |
from rag.settings import es_logger
|
| 11 |
from rag.utils import rmSpace
|
| 12 |
from rag.nlp import huqie, query
|
|
|
|
| 364 |
return ranks
|
| 365 |
|
| 366 |
def sql_retrieval(self, sql, fetch_size=128, format="json"):
|
| 367 |
+
from api.settings import chat_logger
|
| 368 |
sql = re.sub(r"[ ]+", " ", sql)
|
| 369 |
sql = sql.replace("%", "")
|
| 370 |
es_logger.info(f"Get es sql: {sql}")
|