Add upload file by knowledge base name API. (#539)
Browse files### What problem does this PR solve?
Add upload file by knowledge base name API.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update
---------
Co-authored-by: chrysanthemum-boy <[email protected]>
- api/apps/api_app.py +81 -1
- api/db/services/knowledgebase_service.py +17 -2
- docs/conversation_api.md +57 -1
api/apps/api_app.py
CHANGED
|
@@ -13,18 +13,28 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
|
|
|
| 16 |
from datetime import datetime, timedelta
|
| 17 |
from flask import request
|
| 18 |
from flask_login import login_required, current_user
|
|
|
|
|
|
|
| 19 |
from api.db.db_models import APIToken, API4Conversation
|
|
|
|
| 20 |
from api.db.services.api_service import APITokenService, API4ConversationService
|
| 21 |
from api.db.services.dialog_service import DialogService, chat
|
|
|
|
|
|
|
| 22 |
from api.db.services.user_service import UserTenantService
|
| 23 |
from api.settings import RetCode
|
| 24 |
from api.utils import get_uuid, current_timestamp, datetime_format
|
| 25 |
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
| 26 |
from itsdangerous import URLSafeTimedSerializer
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def generate_confirmation_token(tenent_id):
|
| 30 |
serializer = URLSafeTimedSerializer(tenent_id)
|
|
@@ -191,4 +201,74 @@ def get(conversation_id):
|
|
| 191 |
|
| 192 |
return get_json_result(data=conv.to_dict())
|
| 193 |
except Exception as e:
|
| 194 |
-
return server_error_response(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
+
import os
|
| 17 |
+
import re
|
| 18 |
from datetime import datetime, timedelta
|
| 19 |
from flask import request
|
| 20 |
from flask_login import login_required, current_user
|
| 21 |
+
|
| 22 |
+
from api.db import FileType, ParserType
|
| 23 |
from api.db.db_models import APIToken, API4Conversation
|
| 24 |
+
from api.db.services import duplicate_name
|
| 25 |
from api.db.services.api_service import APITokenService, API4ConversationService
|
| 26 |
from api.db.services.dialog_service import DialogService, chat
|
| 27 |
+
from api.db.services.document_service import DocumentService
|
| 28 |
+
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 29 |
from api.db.services.user_service import UserTenantService
|
| 30 |
from api.settings import RetCode
|
| 31 |
from api.utils import get_uuid, current_timestamp, datetime_format
|
| 32 |
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
| 33 |
from itsdangerous import URLSafeTimedSerializer
|
| 34 |
|
| 35 |
+
from api.utils.file_utils import filename_type, thumbnail
|
| 36 |
+
from rag.utils import MINIO
|
| 37 |
+
|
| 38 |
|
| 39 |
def generate_confirmation_token(tenent_id):
|
| 40 |
serializer = URLSafeTimedSerializer(tenent_id)
|
|
|
|
| 201 |
|
| 202 |
return get_json_result(data=conv.to_dict())
|
| 203 |
except Exception as e:
|
| 204 |
+
return server_error_response(e)
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
@manager.route('/document/upload', methods=['POST'])
|
| 208 |
+
@validate_request("kb_name")
|
| 209 |
+
def upload():
|
| 210 |
+
token = request.headers.get('Authorization').split()[1]
|
| 211 |
+
objs = APIToken.query(token=token)
|
| 212 |
+
if not objs:
|
| 213 |
+
return get_json_result(
|
| 214 |
+
data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
|
| 215 |
+
|
| 216 |
+
kb_name = request.form.get("kb_name").strip()
|
| 217 |
+
tenant_id = objs[0].tenant_id
|
| 218 |
+
|
| 219 |
+
try:
|
| 220 |
+
e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
|
| 221 |
+
if not e:
|
| 222 |
+
return get_data_error_result(
|
| 223 |
+
retmsg="Can't find this knowledgebase!")
|
| 224 |
+
kb_id = kb.id
|
| 225 |
+
except Exception as e:
|
| 226 |
+
return server_error_response(e)
|
| 227 |
+
|
| 228 |
+
if 'file' not in request.files:
|
| 229 |
+
return get_json_result(
|
| 230 |
+
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
| 231 |
+
|
| 232 |
+
file = request.files['file']
|
| 233 |
+
if file.filename == '':
|
| 234 |
+
return get_json_result(
|
| 235 |
+
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
| 236 |
+
try:
|
| 237 |
+
if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)):
|
| 238 |
+
return get_data_error_result(
|
| 239 |
+
retmsg="Exceed the maximum file number of a free user!")
|
| 240 |
+
|
| 241 |
+
filename = duplicate_name(
|
| 242 |
+
DocumentService.query,
|
| 243 |
+
name=file.filename,
|
| 244 |
+
kb_id=kb_id)
|
| 245 |
+
filetype = filename_type(filename)
|
| 246 |
+
if not filetype:
|
| 247 |
+
return get_data_error_result(
|
| 248 |
+
retmsg="This type of file has not been supported yet!")
|
| 249 |
+
|
| 250 |
+
location = filename
|
| 251 |
+
while MINIO.obj_exist(kb_id, location):
|
| 252 |
+
location += "_"
|
| 253 |
+
blob = request.files['file'].read()
|
| 254 |
+
MINIO.put(kb_id, location, blob)
|
| 255 |
+
doc = {
|
| 256 |
+
"id": get_uuid(),
|
| 257 |
+
"kb_id": kb.id,
|
| 258 |
+
"parser_id": kb.parser_id,
|
| 259 |
+
"parser_config": kb.parser_config,
|
| 260 |
+
"created_by": kb.tenant_id,
|
| 261 |
+
"type": filetype,
|
| 262 |
+
"name": filename,
|
| 263 |
+
"location": location,
|
| 264 |
+
"size": len(blob),
|
| 265 |
+
"thumbnail": thumbnail(filename, blob)
|
| 266 |
+
}
|
| 267 |
+
if doc["type"] == FileType.VISUAL:
|
| 268 |
+
doc["parser_id"] = ParserType.PICTURE.value
|
| 269 |
+
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 270 |
+
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 271 |
+
doc = DocumentService.insert(doc)
|
| 272 |
+
return get_json_result(data=doc.to_json())
|
| 273 |
+
except Exception as e:
|
| 274 |
+
return server_error_response(e)
|
api/db/services/knowledgebase_service.py
CHANGED
|
@@ -27,7 +27,8 @@ class KnowledgebaseService(CommonService):
|
|
| 27 |
page_number, items_per_page, orderby, desc):
|
| 28 |
kbs = cls.model.select().where(
|
| 29 |
((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
| 30 |
-
|
|
|
|
| 31 |
& (cls.model.status == StatusEnum.VALID.value)
|
| 32 |
)
|
| 33 |
if desc:
|
|
@@ -56,7 +57,8 @@ class KnowledgebaseService(CommonService):
|
|
| 56 |
cls.model.chunk_num,
|
| 57 |
cls.model.parser_id,
|
| 58 |
cls.model.parser_config]
|
| 59 |
-
kbs = cls.model.select(*fields).join(Tenant, on=(
|
|
|
|
| 60 |
(cls.model.id == kb_id),
|
| 61 |
(cls.model.status == StatusEnum.VALID.value)
|
| 62 |
)
|
|
@@ -86,6 +88,7 @@ class KnowledgebaseService(CommonService):
|
|
| 86 |
old[k] = list(set(old[k] + v))
|
| 87 |
else:
|
| 88 |
old[k] = v
|
|
|
|
| 89 |
dfs_update(m.parser_config, config)
|
| 90 |
cls.update_by_id(id, {"parser_config": m.parser_config})
|
| 91 |
|
|
@@ -97,3 +100,15 @@ class KnowledgebaseService(CommonService):
|
|
| 97 |
if k.parser_config and "field_map" in k.parser_config:
|
| 98 |
conf.update(k.parser_config["field_map"])
|
| 99 |
return conf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
page_number, items_per_page, orderby, desc):
|
| 28 |
kbs = cls.model.select().where(
|
| 29 |
((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
| 30 |
+
TenantPermission.TEAM.value)) | (
|
| 31 |
+
cls.model.tenant_id == user_id))
|
| 32 |
& (cls.model.status == StatusEnum.VALID.value)
|
| 33 |
)
|
| 34 |
if desc:
|
|
|
|
| 57 |
cls.model.chunk_num,
|
| 58 |
cls.model.parser_id,
|
| 59 |
cls.model.parser_config]
|
| 60 |
+
kbs = cls.model.select(*fields).join(Tenant, on=(
|
| 61 |
+
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
|
| 62 |
(cls.model.id == kb_id),
|
| 63 |
(cls.model.status == StatusEnum.VALID.value)
|
| 64 |
)
|
|
|
|
| 88 |
old[k] = list(set(old[k] + v))
|
| 89 |
else:
|
| 90 |
old[k] = v
|
| 91 |
+
|
| 92 |
dfs_update(m.parser_config, config)
|
| 93 |
cls.update_by_id(id, {"parser_config": m.parser_config})
|
| 94 |
|
|
|
|
| 100 |
if k.parser_config and "field_map" in k.parser_config:
|
| 101 |
conf.update(k.parser_config["field_map"])
|
| 102 |
return conf
|
| 103 |
+
|
| 104 |
+
@classmethod
|
| 105 |
+
@DB.connection_context()
|
| 106 |
+
def get_by_name(cls, kb_name, tenant_id):
|
| 107 |
+
kb = cls.model.select().where(
|
| 108 |
+
(cls.model.name == kb_name)
|
| 109 |
+
& (cls.model.tenant_id == tenant_id)
|
| 110 |
+
& (cls.model.status == StatusEnum.VALID.value)
|
| 111 |
+
)
|
| 112 |
+
if kb:
|
| 113 |
+
return True, kb[0]
|
| 114 |
+
return False, None
|
docs/conversation_api.md
CHANGED
|
@@ -303,5 +303,61 @@ This will be called to get the answer to users' questions.
|
|
| 303 |
## Get document content or image
|
| 304 |
|
| 305 |
This is usually used when display content of citation.
|
| 306 |
-
### Path: /document/get/\<id\>
|
| 307 |
### Method: GET
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
## Get document content or image
|
| 304 |
|
| 305 |
This is usually used when display content of citation.
|
| 306 |
+
### Path: /api/document/get/\<id\>
|
| 307 |
### Method: GET
|
| 308 |
+
|
| 309 |
+
## Upload file
|
| 310 |
+
|
| 311 |
+
This is usually used when upload a file to.
|
| 312 |
+
### Path: /api/document/upload/
|
| 313 |
+
### Method: POST
|
| 314 |
+
|
| 315 |
+
### Parameter:
|
| 316 |
+
|
| 317 |
+
| name | type | optional | description |
|
| 318 |
+
|---------|--------|----------|----------------------------------------|
|
| 319 |
+
| file | file | No | Upload file. |
|
| 320 |
+
| kb_name | string | No | Choose the upload knowledge base name. |
|
| 321 |
+
|
| 322 |
+
### Response
|
| 323 |
+
```json
|
| 324 |
+
{
|
| 325 |
+
"data": {
|
| 326 |
+
"chunk_num": 0,
|
| 327 |
+
"create_date": "Thu, 25 Apr 2024 14:30:06 GMT",
|
| 328 |
+
"create_time": 1714026606921,
|
| 329 |
+
"created_by": "553ec818fd5711ee8ea63043d7ed348e",
|
| 330 |
+
"id": "41e9324602cd11ef9f5f3043d7ed348e",
|
| 331 |
+
"kb_id": "06802686c0a311ee85d6246e9694c130",
|
| 332 |
+
"location": "readme.txt",
|
| 333 |
+
"name": "readme.txt",
|
| 334 |
+
"parser_config": {
|
| 335 |
+
"field_map": {
|
| 336 |
+
},
|
| 337 |
+
"pages": [
|
| 338 |
+
[
|
| 339 |
+
0,
|
| 340 |
+
1000000
|
| 341 |
+
]
|
| 342 |
+
]
|
| 343 |
+
},
|
| 344 |
+
"parser_id": "general",
|
| 345 |
+
"process_begin_at": null,
|
| 346 |
+
"process_duation": 0.0,
|
| 347 |
+
"progress": 0.0,
|
| 348 |
+
"progress_msg": "",
|
| 349 |
+
"run": "0",
|
| 350 |
+
"size": 929,
|
| 351 |
+
"source_type": "local",
|
| 352 |
+
"status": "1",
|
| 353 |
+
"thumbnail": null,
|
| 354 |
+
"token_num": 0,
|
| 355 |
+
"type": "doc",
|
| 356 |
+
"update_date": "Thu, 25 Apr 2024 14:30:06 GMT",
|
| 357 |
+
"update_time": 1714026606921
|
| 358 |
+
},
|
| 359 |
+
"retcode": 0,
|
| 360 |
+
"retmsg": "success"
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
```
|