cecilia-uu
commited on
Commit
·
061aa4e
1
Parent(s):
2653e84
API: created list_doc (#1327)
Browse files### What problem does this PR solve?
Adds the api of listing documentation.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/dataset_api.py +223 -2
- api/apps/documents_api.py +0 -228
- api/db/services/document_service.py +29 -0
- api/db/services/knowledgebase_service.py +3 -0
- docs/references/ragflow_api.md +3 -1
- sdk/python/ragflow/ragflow.py +17 -7
- sdk/python/test/test_document.py +135 -6
api/apps/dataset_api.py
CHANGED
@@ -13,13 +13,17 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
|
|
|
|
|
|
|
16 |
|
17 |
from flask import request
|
18 |
from flask_login import login_required, current_user
|
19 |
from httpx import HTTPError
|
20 |
|
21 |
from api.contants import NAME_LENGTH_LIMIT
|
22 |
-
from api.db import
|
|
|
23 |
from api.db.db_models import File
|
24 |
from api.db.services import duplicate_name
|
25 |
from api.db.services.document_service import DocumentService
|
@@ -29,8 +33,12 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
29 |
from api.db.services.user_service import TenantService
|
30 |
from api.settings import RetCode
|
31 |
from api.utils import get_uuid
|
32 |
-
from api.utils.api_utils import construct_json_result,
|
|
|
|
|
|
|
33 |
|
|
|
34 |
|
35 |
# ------------------------------ create a dataset ---------------------------------------
|
36 |
|
@@ -253,3 +261,216 @@ def update_dataset(dataset_id):
|
|
253 |
return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS)
|
254 |
except Exception as e:
|
255 |
return construct_error_response(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
|
16 |
+
import os
|
17 |
+
import re
|
18 |
+
import warnings
|
19 |
|
20 |
from flask import request
|
21 |
from flask_login import login_required, current_user
|
22 |
from httpx import HTTPError
|
23 |
|
24 |
from api.contants import NAME_LENGTH_LIMIT
|
25 |
+
from api.db import FileType, ParserType, FileSource
|
26 |
+
from api.db import StatusEnum
|
27 |
from api.db.db_models import File
|
28 |
from api.db.services import duplicate_name
|
29 |
from api.db.services.document_service import DocumentService
|
|
|
33 |
from api.db.services.user_service import TenantService
|
34 |
from api.settings import RetCode
|
35 |
from api.utils import get_uuid
|
36 |
+
from api.utils.api_utils import construct_json_result, construct_error_response
|
37 |
+
from api.utils.api_utils import construct_result, validate_request
|
38 |
+
from api.utils.file_utils import filename_type, thumbnail
|
39 |
+
from rag.utils.minio_conn import MINIO
|
40 |
|
41 |
+
MAXIMUM_OF_UPLOADING_FILES = 256
|
42 |
|
43 |
# ------------------------------ create a dataset ---------------------------------------
|
44 |
|
|
|
261 |
return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS)
|
262 |
except Exception as e:
|
263 |
return construct_error_response(e)
|
264 |
+
|
265 |
+
# --------------------------------content management ----------------------------------------------
|
266 |
+
|
267 |
+
# ----------------------------upload files-----------------------------------------------------
|
268 |
+
@manager.route('/<dataset_id>/documents/', methods=['POST'])
|
269 |
+
@login_required
|
270 |
+
def upload_documents(dataset_id):
|
271 |
+
# no files
|
272 |
+
if not request.files:
|
273 |
+
return construct_json_result(
|
274 |
+
message='There is no file!', code=RetCode.ARGUMENT_ERROR)
|
275 |
+
|
276 |
+
# the number of uploading files exceeds the limit
|
277 |
+
file_objs = request.files.getlist('file')
|
278 |
+
num_file_objs = len(file_objs)
|
279 |
+
|
280 |
+
if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
|
281 |
+
return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, "
|
282 |
+
f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}")
|
283 |
+
|
284 |
+
for file_obj in file_objs:
|
285 |
+
# the content of the file
|
286 |
+
file_content = file_obj.read()
|
287 |
+
file_name = file_obj.filename
|
288 |
+
# no name
|
289 |
+
if not file_name:
|
290 |
+
return construct_json_result(
|
291 |
+
message='There is a file without name!', code=RetCode.ARGUMENT_ERROR)
|
292 |
+
|
293 |
+
# TODO: support the remote files
|
294 |
+
if 'http' in file_name:
|
295 |
+
return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.")
|
296 |
+
|
297 |
+
# the content is empty, raising a warning
|
298 |
+
if file_content == b'':
|
299 |
+
warnings.warn(f"[WARNING]: The file {file_name} is empty.")
|
300 |
+
|
301 |
+
# no dataset
|
302 |
+
exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
|
303 |
+
if not exist:
|
304 |
+
return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR)
|
305 |
+
|
306 |
+
# get the root_folder
|
307 |
+
root_folder = FileService.get_root_folder(current_user.id)
|
308 |
+
# get the id of the root_folder
|
309 |
+
parent_file_id = root_folder["id"] # document id
|
310 |
+
# this is for the new user, create '.knowledgebase' file
|
311 |
+
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
312 |
+
# go inside this folder, get the kb_root_folder
|
313 |
+
kb_root_folder = FileService.get_kb_folder(current_user.id)
|
314 |
+
# link the file management to the kb_folder
|
315 |
+
kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"])
|
316 |
+
|
317 |
+
# grab all the errs
|
318 |
+
err = []
|
319 |
+
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
320 |
+
uploaded_docs_json = []
|
321 |
+
for file in file_objs:
|
322 |
+
try:
|
323 |
+
# TODO: get this value from the database as some tenants have this limit while others don't
|
324 |
+
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
325 |
+
return construct_json_result(code=RetCode.DATA_ERROR,
|
326 |
+
message="Exceed the maximum file number of a free user!")
|
327 |
+
# deal with the duplicate name
|
328 |
+
filename = duplicate_name(
|
329 |
+
DocumentService.query,
|
330 |
+
name=file.filename,
|
331 |
+
kb_id=dataset.id)
|
332 |
+
|
333 |
+
# deal with the unsupported type
|
334 |
+
filetype = filename_type(filename)
|
335 |
+
if filetype == FileType.OTHER.value:
|
336 |
+
return construct_json_result(code=RetCode.DATA_ERROR,
|
337 |
+
message="This type of file has not been supported yet!")
|
338 |
+
|
339 |
+
# upload to the minio
|
340 |
+
location = filename
|
341 |
+
while MINIO.obj_exist(dataset_id, location):
|
342 |
+
location += "_"
|
343 |
+
blob = file.read()
|
344 |
+
MINIO.put(dataset_id, location, blob)
|
345 |
+
doc = {
|
346 |
+
"id": get_uuid(),
|
347 |
+
"kb_id": dataset.id,
|
348 |
+
"parser_id": dataset.parser_id,
|
349 |
+
"parser_config": dataset.parser_config,
|
350 |
+
"created_by": current_user.id,
|
351 |
+
"type": filetype,
|
352 |
+
"name": filename,
|
353 |
+
"location": location,
|
354 |
+
"size": len(blob),
|
355 |
+
"thumbnail": thumbnail(filename, blob)
|
356 |
+
}
|
357 |
+
if doc["type"] == FileType.VISUAL:
|
358 |
+
doc["parser_id"] = ParserType.PICTURE.value
|
359 |
+
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
360 |
+
doc["parser_id"] = ParserType.PRESENTATION.value
|
361 |
+
DocumentService.insert(doc)
|
362 |
+
|
363 |
+
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
364 |
+
uploaded_docs_json.append(doc)
|
365 |
+
except Exception as e:
|
366 |
+
err.append(file.filename + ": " + str(e))
|
367 |
+
|
368 |
+
if err:
|
369 |
+
# return all the errors
|
370 |
+
return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
371 |
+
# success
|
372 |
+
return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
|
373 |
+
|
374 |
+
|
375 |
+
# ----------------------------delete a file-----------------------------------------------------
|
376 |
+
@manager.route('/<dataset_id>/documents/<document_id>', methods=['DELETE'])
|
377 |
+
@login_required
|
378 |
+
def delete_document(document_id, dataset_id): # string
|
379 |
+
# get the root folder
|
380 |
+
root_folder = FileService.get_root_folder(current_user.id)
|
381 |
+
# parent file's id
|
382 |
+
parent_file_id = root_folder["id"]
|
383 |
+
# consider the new user
|
384 |
+
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
385 |
+
# store all the errors that may have
|
386 |
+
errors = ""
|
387 |
+
try:
|
388 |
+
# whether there is this document
|
389 |
+
exist, doc = DocumentService.get_by_id(document_id)
|
390 |
+
if not exist:
|
391 |
+
return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
|
392 |
+
# whether this doc is authorized by this tenant
|
393 |
+
tenant_id = DocumentService.get_tenant_id(document_id)
|
394 |
+
if not tenant_id:
|
395 |
+
return construct_json_result(
|
396 |
+
message=f"You cannot delete this document {document_id} due to the authorization"
|
397 |
+
f" reason!", code=RetCode.AUTHENTICATION_ERROR)
|
398 |
+
|
399 |
+
# get the doc's id and location
|
400 |
+
real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id)
|
401 |
+
|
402 |
+
if real_dataset_id != dataset_id:
|
403 |
+
return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
|
404 |
+
f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
|
405 |
+
|
406 |
+
# there is an issue when removing
|
407 |
+
if not DocumentService.remove_document(doc, tenant_id):
|
408 |
+
return construct_json_result(
|
409 |
+
message="There was an error during the document removal process. Please check the status of the "
|
410 |
+
"RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
|
411 |
+
|
412 |
+
# fetch the File2Document record associated with the provided document ID.
|
413 |
+
file_to_doc = File2DocumentService.get_by_document_id(document_id)
|
414 |
+
# delete the associated File record.
|
415 |
+
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
|
416 |
+
# delete the File2Document record itself using the document ID. This removes the
|
417 |
+
# association between the document and the file after the File record has been deleted.
|
418 |
+
File2DocumentService.delete_by_document_id(document_id)
|
419 |
+
|
420 |
+
# delete it from minio
|
421 |
+
MINIO.rm(dataset_id, location)
|
422 |
+
except Exception as e:
|
423 |
+
errors += str(e)
|
424 |
+
if errors:
|
425 |
+
return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
|
426 |
+
|
427 |
+
return construct_json_result(data=True, code=RetCode.SUCCESS)
|
428 |
+
|
429 |
+
|
430 |
+
# ----------------------------list files-----------------------------------------------------
|
431 |
+
@manager.route('/<dataset_id>/documents/', methods=['GET'])
|
432 |
+
@login_required
|
433 |
+
def list_documents(dataset_id):
|
434 |
+
if not dataset_id:
|
435 |
+
return construct_json_result(
|
436 |
+
data=False, message='Lack of "dataset_id"', code=RetCode.ARGUMENT_ERROR)
|
437 |
+
|
438 |
+
# searching keywords
|
439 |
+
keywords = request.args.get("keywords", "")
|
440 |
+
|
441 |
+
offset = request.args.get("offset", 0)
|
442 |
+
count = request.args.get("count", -1)
|
443 |
+
order_by = request.args.get("order_by", "create_time")
|
444 |
+
descend = request.args.get("descend", True)
|
445 |
+
try:
|
446 |
+
docs, total = DocumentService.list_documents_in_dataset(dataset_id, int(offset), int(count), order_by,
|
447 |
+
descend, keywords)
|
448 |
+
|
449 |
+
return construct_json_result(data={"total": total, "docs": docs}, message=RetCode.SUCCESS)
|
450 |
+
except Exception as e:
|
451 |
+
return construct_error_response(e)
|
452 |
+
|
453 |
+
# ----------------------------download a file-----------------------------------------------------
|
454 |
+
|
455 |
+
# ----------------------------enable rename-----------------------------------------------------
|
456 |
+
|
457 |
+
# ----------------------------start parsing-----------------------------------------------------
|
458 |
+
|
459 |
+
# ----------------------------stop parsing-----------------------------------------------------
|
460 |
+
|
461 |
+
# ----------------------------show the status of the file-----------------------------------------------------
|
462 |
+
|
463 |
+
# ----------------------------list the chunks of the file-----------------------------------------------------
|
464 |
+
|
465 |
+
# ----------------------------delete the chunk-----------------------------------------------------
|
466 |
+
|
467 |
+
# ----------------------------edit the status of the chunk-----------------------------------------------------
|
468 |
+
|
469 |
+
# ----------------------------insert a new chunk-----------------------------------------------------
|
470 |
+
|
471 |
+
# ----------------------------upload a file-----------------------------------------------------
|
472 |
+
|
473 |
+
# ----------------------------get a specific chunk-----------------------------------------------------
|
474 |
+
|
475 |
+
# ----------------------------retrieval test-----------------------------------------------------
|
476 |
+
|
api/apps/documents_api.py
DELETED
@@ -1,228 +0,0 @@
|
|
1 |
-
#
|
2 |
-
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
3 |
-
#
|
4 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
-
# you may not use this file except in compliance with the License.
|
6 |
-
# You may obtain a copy of the License at
|
7 |
-
#
|
8 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
-
#
|
10 |
-
# Unless required by applicable law or agreed to in writing, software
|
11 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
-
# See the License for the specific language governing permissions and
|
14 |
-
# limitations under the License
|
15 |
-
#
|
16 |
-
|
17 |
-
import os
|
18 |
-
import re
|
19 |
-
import warnings
|
20 |
-
|
21 |
-
from flask import request
|
22 |
-
from flask_login import login_required, current_user
|
23 |
-
|
24 |
-
from api.db import FileType, ParserType
|
25 |
-
from api.db.services import duplicate_name
|
26 |
-
from api.db.services.document_service import DocumentService
|
27 |
-
from api.db.services.file2document_service import File2DocumentService
|
28 |
-
from api.db.services.file_service import FileService
|
29 |
-
from api.db.services.knowledgebase_service import KnowledgebaseService
|
30 |
-
from api.settings import RetCode
|
31 |
-
from api.utils import get_uuid
|
32 |
-
from api.utils.api_utils import construct_json_result
|
33 |
-
from api.utils.file_utils import filename_type, thumbnail
|
34 |
-
from rag.utils.minio_conn import MINIO
|
35 |
-
from api.db.db_models import Task, File
|
36 |
-
from api.db import FileType, TaskStatus, ParserType, FileSource
|
37 |
-
|
38 |
-
|
39 |
-
MAXIMUM_OF_UPLOADING_FILES = 256
|
40 |
-
|
41 |
-
|
42 |
-
# ----------------------------upload local files-----------------------------------------------------
|
43 |
-
@manager.route('/<dataset_id>', methods=['POST'])
|
44 |
-
@login_required
|
45 |
-
def upload(dataset_id):
|
46 |
-
# no files
|
47 |
-
if not request.files:
|
48 |
-
return construct_json_result(
|
49 |
-
message='There is no file!', code=RetCode.ARGUMENT_ERROR)
|
50 |
-
|
51 |
-
# the number of uploading files exceeds the limit
|
52 |
-
file_objs = request.files.getlist('file')
|
53 |
-
num_file_objs = len(file_objs)
|
54 |
-
|
55 |
-
if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
|
56 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, "
|
57 |
-
f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}")
|
58 |
-
|
59 |
-
for file_obj in file_objs:
|
60 |
-
# the content of the file
|
61 |
-
file_content = file_obj.read()
|
62 |
-
file_name = file_obj.filename
|
63 |
-
# no name
|
64 |
-
if not file_name:
|
65 |
-
return construct_json_result(
|
66 |
-
message='There is a file without name!', code=RetCode.ARGUMENT_ERROR)
|
67 |
-
|
68 |
-
# TODO: support the remote files
|
69 |
-
if 'http' in file_name:
|
70 |
-
return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.")
|
71 |
-
|
72 |
-
# the content is empty, raising a warning
|
73 |
-
if file_content == b'':
|
74 |
-
warnings.warn(f"[WARNING]: The file {file_name} is empty.")
|
75 |
-
|
76 |
-
# no dataset
|
77 |
-
exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
|
78 |
-
if not exist:
|
79 |
-
return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR)
|
80 |
-
|
81 |
-
# get the root_folder
|
82 |
-
root_folder = FileService.get_root_folder(current_user.id)
|
83 |
-
# get the id of the root_folder
|
84 |
-
parent_file_id = root_folder["id"] # document id
|
85 |
-
# this is for the new user, create '.knowledgebase' file
|
86 |
-
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
87 |
-
# go inside this folder, get the kb_root_folder
|
88 |
-
kb_root_folder = FileService.get_kb_folder(current_user.id)
|
89 |
-
# link the file management to the kb_folder
|
90 |
-
kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"])
|
91 |
-
|
92 |
-
# grab all the errs
|
93 |
-
err = []
|
94 |
-
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
95 |
-
uploaded_docs_json = []
|
96 |
-
for file in file_objs:
|
97 |
-
try:
|
98 |
-
# TODO: get this value from the database as some tenants have this limit while others don't
|
99 |
-
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
100 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
101 |
-
message="Exceed the maximum file number of a free user!")
|
102 |
-
# deal with the duplicate name
|
103 |
-
filename = duplicate_name(
|
104 |
-
DocumentService.query,
|
105 |
-
name=file.filename,
|
106 |
-
kb_id=dataset.id)
|
107 |
-
|
108 |
-
# deal with the unsupported type
|
109 |
-
filetype = filename_type(filename)
|
110 |
-
if filetype == FileType.OTHER.value:
|
111 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
112 |
-
message="This type of file has not been supported yet!")
|
113 |
-
|
114 |
-
# upload to the minio
|
115 |
-
location = filename
|
116 |
-
while MINIO.obj_exist(dataset_id, location):
|
117 |
-
location += "_"
|
118 |
-
blob = file.read()
|
119 |
-
MINIO.put(dataset_id, location, blob)
|
120 |
-
doc = {
|
121 |
-
"id": get_uuid(),
|
122 |
-
"kb_id": dataset.id,
|
123 |
-
"parser_id": dataset.parser_id,
|
124 |
-
"parser_config": dataset.parser_config,
|
125 |
-
"created_by": current_user.id,
|
126 |
-
"type": filetype,
|
127 |
-
"name": filename,
|
128 |
-
"location": location,
|
129 |
-
"size": len(blob),
|
130 |
-
"thumbnail": thumbnail(filename, blob)
|
131 |
-
}
|
132 |
-
if doc["type"] == FileType.VISUAL:
|
133 |
-
doc["parser_id"] = ParserType.PICTURE.value
|
134 |
-
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
135 |
-
doc["parser_id"] = ParserType.PRESENTATION.value
|
136 |
-
DocumentService.insert(doc)
|
137 |
-
|
138 |
-
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
139 |
-
uploaded_docs_json.append(doc)
|
140 |
-
except Exception as e:
|
141 |
-
err.append(file.filename + ": " + str(e))
|
142 |
-
|
143 |
-
if err:
|
144 |
-
# return all the errors
|
145 |
-
return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
146 |
-
# success
|
147 |
-
return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
|
148 |
-
|
149 |
-
# ----------------------------delete a file-----------------------------------------------------
|
150 |
-
@manager.route('/<dataset_id>/<document_id>', methods=['DELETE'])
|
151 |
-
@login_required
|
152 |
-
def delete(document_id, dataset_id): # string
|
153 |
-
# get the root folder
|
154 |
-
root_folder = FileService.get_root_folder(current_user.id)
|
155 |
-
# parent file's id
|
156 |
-
parent_file_id = root_folder["id"]
|
157 |
-
# consider the new user
|
158 |
-
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
159 |
-
# store all the errors that may have
|
160 |
-
errors = ""
|
161 |
-
try:
|
162 |
-
# whether there is this document
|
163 |
-
exist, doc = DocumentService.get_by_id(document_id)
|
164 |
-
if not exist:
|
165 |
-
return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
|
166 |
-
# whether this doc is authorized by this tenant
|
167 |
-
tenant_id = DocumentService.get_tenant_id(document_id)
|
168 |
-
if not tenant_id:
|
169 |
-
return construct_json_result(message=f"You cannot delete this document {document_id} due to the authorization"
|
170 |
-
f" reason!", code=RetCode.AUTHENTICATION_ERROR)
|
171 |
-
|
172 |
-
# get the doc's id and location
|
173 |
-
real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id)
|
174 |
-
|
175 |
-
if real_dataset_id != dataset_id:
|
176 |
-
return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
|
177 |
-
f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
|
178 |
-
|
179 |
-
# there is an issue when removing
|
180 |
-
if not DocumentService.remove_document(doc, tenant_id):
|
181 |
-
return construct_json_result(
|
182 |
-
message="There was an error during the document removal process. Please check the status of the "
|
183 |
-
"RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
|
184 |
-
|
185 |
-
# fetch the File2Document record associated with the provided document ID.
|
186 |
-
file_to_doc = File2DocumentService.get_by_document_id(document_id)
|
187 |
-
# delete the associated File record.
|
188 |
-
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
|
189 |
-
# delete the File2Document record itself using the document ID. This removes the
|
190 |
-
# association between the document and the file after the File record has been deleted.
|
191 |
-
File2DocumentService.delete_by_document_id(document_id)
|
192 |
-
|
193 |
-
# delete it from minio
|
194 |
-
MINIO.rm(dataset_id, location)
|
195 |
-
except Exception as e:
|
196 |
-
errors += str(e)
|
197 |
-
if errors:
|
198 |
-
return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
|
199 |
-
|
200 |
-
return construct_json_result(data=True, code=RetCode.SUCCESS)
|
201 |
-
|
202 |
-
# ----------------------------upload online files------------------------------------------------
|
203 |
-
|
204 |
-
# ----------------------------download a file-----------------------------------------------------
|
205 |
-
|
206 |
-
# ----------------------------enable rename-----------------------------------------------------
|
207 |
-
|
208 |
-
# ----------------------------list files-----------------------------------------------------
|
209 |
-
|
210 |
-
# ----------------------------start parsing-----------------------------------------------------
|
211 |
-
|
212 |
-
# ----------------------------stop parsing-----------------------------------------------------
|
213 |
-
|
214 |
-
# ----------------------------show the status of the file-----------------------------------------------------
|
215 |
-
|
216 |
-
# ----------------------------list the chunks of the file-----------------------------------------------------
|
217 |
-
|
218 |
-
# ----------------------------delete the chunk-----------------------------------------------------
|
219 |
-
|
220 |
-
# ----------------------------edit the status of the chunk-----------------------------------------------------
|
221 |
-
|
222 |
-
# ----------------------------insert a new chunk-----------------------------------------------------
|
223 |
-
|
224 |
-
# ----------------------------upload a file-----------------------------------------------------
|
225 |
-
|
226 |
-
# ----------------------------get a specific chunk-----------------------------------------------------
|
227 |
-
|
228 |
-
# ----------------------------retrieval test-----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/db/services/document_service.py
CHANGED
@@ -59,6 +59,35 @@ class DocumentService(CommonService):
|
|
59 |
|
60 |
return list(docs.dicts()), count
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
@classmethod
|
63 |
@DB.connection_context()
|
64 |
def insert(cls, doc):
|
|
|
59 |
|
60 |
return list(docs.dicts()), count
|
61 |
|
62 |
+
@classmethod
|
63 |
+
@DB.connection_context()
|
64 |
+
def list_documents_in_dataset(cls, dataset_id, offset, count, order_by, descend, keywords):
|
65 |
+
if keywords:
|
66 |
+
docs = cls.model.select().where(
|
67 |
+
(cls.model.kb_id == dataset_id),
|
68 |
+
(fn.LOWER(cls.model.name).contains(keywords.lower()))
|
69 |
+
)
|
70 |
+
else:
|
71 |
+
docs = cls.model.select().where(cls.model.kb_id == dataset_id)
|
72 |
+
|
73 |
+
total = docs.count()
|
74 |
+
|
75 |
+
if descend == 'True':
|
76 |
+
docs = docs.order_by(cls.model.getter_by(order_by).desc())
|
77 |
+
if descend == 'False':
|
78 |
+
docs = docs.order_by(cls.model.getter_by(order_by).asc())
|
79 |
+
|
80 |
+
docs = list(docs.dicts())
|
81 |
+
docs_length = len(docs)
|
82 |
+
|
83 |
+
if offset < 0 or offset > docs_length:
|
84 |
+
raise IndexError("Offset is out of the valid range.")
|
85 |
+
|
86 |
+
if count == -1:
|
87 |
+
return docs[offset:], total
|
88 |
+
|
89 |
+
return docs[offset:offset + count], total
|
90 |
+
|
91 |
@classmethod
|
92 |
@DB.connection_context()
|
93 |
def insert(cls, doc):
|
api/db/services/knowledgebase_service.py
CHANGED
@@ -60,6 +60,9 @@ class KnowledgebaseService(CommonService):
|
|
60 |
if offset < 0 or offset > kbs_length:
|
61 |
raise IndexError("Offset is out of the valid range.")
|
62 |
|
|
|
|
|
|
|
63 |
return kbs[offset:offset+count]
|
64 |
|
65 |
@classmethod
|
|
|
60 |
if offset < 0 or offset > kbs_length:
|
61 |
raise IndexError("Offset is out of the valid range.")
|
62 |
|
63 |
+
if count == -1:
|
64 |
+
return kbs[offset:]
|
65 |
+
|
66 |
return kbs[offset:offset+count]
|
67 |
|
68 |
@classmethod
|
docs/references/ragflow_api.md
CHANGED
@@ -274,4 +274,6 @@ You are required to input at least one parameter.
|
|
274 |
"code": 102,
|
275 |
"message": "Please input at least one parameter that you want to update!"
|
276 |
}
|
277 |
-
```
|
|
|
|
|
|
274 |
"code": 102,
|
275 |
"message": "Please input at least one parameter that you want to update!"
|
276 |
}
|
277 |
+
```
|
278 |
+
|
279 |
+
|
sdk/python/ragflow/ragflow.py
CHANGED
@@ -26,12 +26,11 @@ class RAGFlow:
|
|
26 |
'''
|
27 |
api_url: http://<host_address>/api/v1
|
28 |
dataset_url: http://<host_address>/api/v1/dataset
|
29 |
-
document_url: http://<host_address>/api/v1/documents
|
30 |
'''
|
31 |
self.user_key = user_key
|
32 |
self.api_url = f"{base_url}/api/{version}"
|
33 |
self.dataset_url = f"{self.api_url}/dataset"
|
34 |
-
self.document_url = f"{self.api_url}/documents"
|
35 |
self.authorization_header = {"Authorization": "{}".format(self.user_key)}
|
36 |
|
37 |
def create_dataset(self, dataset_name):
|
@@ -79,7 +78,7 @@ class RAGFlow:
|
|
79 |
response = requests.put(endpoint, json=params, headers=self.authorization_header)
|
80 |
return response.json()
|
81 |
|
82 |
-
# -------------------- content management -----------------------------------------------------
|
83 |
|
84 |
# ----------------------------upload local files-----------------------------------------------------
|
85 |
def upload_local_file(self, dataset_id, file_paths):
|
@@ -95,7 +94,7 @@ class RAGFlow:
|
|
95 |
else:
|
96 |
return {'code': RetCode.DATA_ERROR, 'message': f"The file {file_path} does not exist"}
|
97 |
|
98 |
-
res = requests.request('POST', url=f"{self.
|
99 |
headers=self.authorization_header)
|
100 |
|
101 |
result_dict = json.loads(res.text)
|
@@ -103,16 +102,27 @@ class RAGFlow:
|
|
103 |
|
104 |
# ----------------------------delete a file-----------------------------------------------------
|
105 |
def delete_files(self, document_id, dataset_id):
|
106 |
-
endpoint = f"{self.
|
107 |
res = requests.delete(endpoint, headers=self.authorization_header)
|
108 |
return res.json()
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
# ----------------------------download a file-----------------------------------------------------
|
111 |
|
112 |
# ----------------------------enable rename-----------------------------------------------------
|
113 |
|
114 |
-
# ----------------------------list files-----------------------------------------------------
|
115 |
-
|
116 |
# ----------------------------start parsing-----------------------------------------------------
|
117 |
|
118 |
# ----------------------------stop parsing-----------------------------------------------------
|
|
|
26 |
'''
|
27 |
api_url: http://<host_address>/api/v1
|
28 |
dataset_url: http://<host_address>/api/v1/dataset
|
29 |
+
document_url: http://<host_address>/api/v1/dataset/{dataset_id}/documents
|
30 |
'''
|
31 |
self.user_key = user_key
|
32 |
self.api_url = f"{base_url}/api/{version}"
|
33 |
self.dataset_url = f"{self.api_url}/dataset"
|
|
|
34 |
self.authorization_header = {"Authorization": "{}".format(self.user_key)}
|
35 |
|
36 |
def create_dataset(self, dataset_name):
|
|
|
78 |
response = requests.put(endpoint, json=params, headers=self.authorization_header)
|
79 |
return response.json()
|
80 |
|
81 |
+
# -------------------- content management -----------------------------------------------------
|
82 |
|
83 |
# ----------------------------upload local files-----------------------------------------------------
|
84 |
def upload_local_file(self, dataset_id, file_paths):
|
|
|
94 |
else:
|
95 |
return {'code': RetCode.DATA_ERROR, 'message': f"The file {file_path} does not exist"}
|
96 |
|
97 |
+
res = requests.request('POST', url=f"{self.dataset_url}/{dataset_id}/documents", files=files,
|
98 |
headers=self.authorization_header)
|
99 |
|
100 |
result_dict = json.loads(res.text)
|
|
|
102 |
|
103 |
# ----------------------------delete a file-----------------------------------------------------
|
104 |
def delete_files(self, document_id, dataset_id):
|
105 |
+
endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}"
|
106 |
res = requests.delete(endpoint, headers=self.authorization_header)
|
107 |
return res.json()
|
108 |
|
109 |
+
# ----------------------------list files-----------------------------------------------------
|
110 |
+
def list_files(self, dataset_id, offset=0, count=-1, order_by="create_time", descend=True, keywords=""):
|
111 |
+
params = {
|
112 |
+
"offset": offset,
|
113 |
+
"count": count,
|
114 |
+
"order_by": order_by,
|
115 |
+
"descend": descend,
|
116 |
+
"keywords": keywords
|
117 |
+
}
|
118 |
+
endpoint = f"{self.dataset_url}/{dataset_id}/documents/"
|
119 |
+
res = requests.get(endpoint, params=params, headers=self.authorization_header)
|
120 |
+
return res.json()
|
121 |
+
|
122 |
# ----------------------------download a file-----------------------------------------------------
|
123 |
|
124 |
# ----------------------------enable rename-----------------------------------------------------
|
125 |
|
|
|
|
|
126 |
# ----------------------------start parsing-----------------------------------------------------
|
127 |
|
128 |
# ----------------------------stop parsing-----------------------------------------------------
|
sdk/python/test/test_document.py
CHANGED
@@ -37,7 +37,7 @@ class TestFile(TestSdk):
|
|
37 |
dataset_id = created_res['data']['dataset_id']
|
38 |
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
39 |
res = ragflow.upload_local_file(dataset_id, file_paths)
|
40 |
-
assert res['code'] == RetCode.SUCCESS and res['
|
41 |
|
42 |
def test_upload_one_file(self):
|
43 |
"""
|
@@ -48,7 +48,7 @@ class TestFile(TestSdk):
|
|
48 |
dataset_id = created_res['data']['dataset_id']
|
49 |
file_paths = ["test_data/test.txt"]
|
50 |
res = ragflow.upload_local_file(dataset_id, file_paths)
|
51 |
-
assert res['code'] == RetCode.SUCCESS and res['
|
52 |
|
53 |
def test_upload_nonexistent_files(self):
|
54 |
"""
|
@@ -237,12 +237,143 @@ class TestFile(TestSdk):
|
|
237 |
assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
|
238 |
f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')
|
239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
# ----------------------------download a file-----------------------------------------------------
|
241 |
|
242 |
# ----------------------------enable rename-----------------------------------------------------
|
243 |
|
244 |
-
# ----------------------------list files-----------------------------------------------------
|
245 |
-
|
246 |
# ----------------------------start parsing-----------------------------------------------------
|
247 |
|
248 |
# ----------------------------stop parsing-----------------------------------------------------
|
@@ -257,8 +388,6 @@ class TestFile(TestSdk):
|
|
257 |
|
258 |
# ----------------------------insert a new chunk-----------------------------------------------------
|
259 |
|
260 |
-
# ----------------------------upload a file-----------------------------------------------------
|
261 |
-
|
262 |
# ----------------------------get a specific chunk-----------------------------------------------------
|
263 |
|
264 |
# ----------------------------retrieval test-----------------------------------------------------
|
|
|
37 |
dataset_id = created_res['data']['dataset_id']
|
38 |
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
39 |
res = ragflow.upload_local_file(dataset_id, file_paths)
|
40 |
+
assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
|
41 |
|
42 |
def test_upload_one_file(self):
|
43 |
"""
|
|
|
48 |
dataset_id = created_res['data']['dataset_id']
|
49 |
file_paths = ["test_data/test.txt"]
|
50 |
res = ragflow.upload_local_file(dataset_id, file_paths)
|
51 |
+
assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
|
52 |
|
53 |
def test_upload_nonexistent_files(self):
|
54 |
"""
|
|
|
237 |
assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
|
238 |
f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')
|
239 |
|
240 |
+
# ----------------------------list files-----------------------------------------------------
|
241 |
+
def test_list_documents_with_success(self):
|
242 |
+
"""
|
243 |
+
Test listing documents with a successful outcome.
|
244 |
+
"""
|
245 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
246 |
+
# upload a document
|
247 |
+
created_res = ragflow.create_dataset("test_list_documents_with_success")
|
248 |
+
created_res_id = created_res['data']['dataset_id']
|
249 |
+
file_paths = ["test_data/test.txt"]
|
250 |
+
ragflow.upload_local_file(created_res_id, file_paths)
|
251 |
+
# Call the list_document method
|
252 |
+
response = ragflow.list_files(created_res_id)
|
253 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
|
254 |
+
|
255 |
+
def test_list_documents_with_checking_size(self):
|
256 |
+
"""
|
257 |
+
Test listing documents and verify the size and names of the documents.
|
258 |
+
"""
|
259 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
260 |
+
# upload 10 documents
|
261 |
+
created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
|
262 |
+
created_res_id = created_res['data']['dataset_id']
|
263 |
+
file_paths = ["test_data/test.txt"] * 10
|
264 |
+
ragflow.upload_local_file(created_res_id, file_paths)
|
265 |
+
# Call the list_document method
|
266 |
+
response = ragflow.list_files(created_res_id)
|
267 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
|
268 |
+
|
269 |
+
def test_list_documents_with_getting_empty_result(self):
|
270 |
+
"""
|
271 |
+
Test listing documents that should be empty.
|
272 |
+
"""
|
273 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
274 |
+
# upload 0 documents
|
275 |
+
created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
|
276 |
+
created_res_id = created_res['data']['dataset_id']
|
277 |
+
# Call the list_document method
|
278 |
+
response = ragflow.list_files(created_res_id)
|
279 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 0
|
280 |
+
|
281 |
+
def test_list_documents_with_creating_100_documents(self):
|
282 |
+
"""
|
283 |
+
Test listing 100 documents and verify the size of these documents.
|
284 |
+
"""
|
285 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
286 |
+
# upload 100 documents
|
287 |
+
created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
|
288 |
+
created_res_id = created_res['data']['dataset_id']
|
289 |
+
file_paths = ["test_data/test.txt"] * 100
|
290 |
+
ragflow.upload_local_file(created_res_id, file_paths)
|
291 |
+
# Call the list_document method
|
292 |
+
response = ragflow.list_files(created_res_id)
|
293 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 100
|
294 |
+
|
295 |
+
def test_list_document_with_failure(self):
|
296 |
+
"""
|
297 |
+
Test listing documents with IndexError.
|
298 |
+
"""
|
299 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
300 |
+
created_res = ragflow.create_dataset("test_list_document_with_failure")
|
301 |
+
created_res_id = created_res['data']['dataset_id']
|
302 |
+
response = ragflow.list_files(created_res_id, offset=-1, count=-1)
|
303 |
+
assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
|
304 |
+
|
305 |
+
def test_list_document_with_verifying_offset_and_count(self):
|
306 |
+
"""
|
307 |
+
Test listing documents with verifying the functionalities of offset and count.
|
308 |
+
"""
|
309 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
310 |
+
created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
|
311 |
+
created_res_id = created_res['data']['dataset_id']
|
312 |
+
file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
|
313 |
+
ragflow.upload_local_file(created_res_id, file_paths)
|
314 |
+
# Call the list_document method
|
315 |
+
response = ragflow.list_files(created_res_id, offset=2, count=10)
|
316 |
+
|
317 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
|
318 |
+
|
319 |
+
def test_list_document_with_verifying_keywords(self):
|
320 |
+
"""
|
321 |
+
Test listing documents with verifying the functionality of searching keywords.
|
322 |
+
"""
|
323 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
324 |
+
created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
|
325 |
+
created_res_id = created_res['data']['dataset_id']
|
326 |
+
file_paths = ["test_data/test.txt", "test_data/empty.txt"]
|
327 |
+
ragflow.upload_local_file(created_res_id, file_paths)
|
328 |
+
# Call the list_document method
|
329 |
+
response = ragflow.list_files(created_res_id, keywords="empty")
|
330 |
+
|
331 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
|
332 |
+
|
333 |
+
def test_list_document_with_verifying_order_by_and_descend(self):
|
334 |
+
"""
|
335 |
+
Test listing documents with verifying the functionality of order_by and descend.
|
336 |
+
"""
|
337 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
338 |
+
created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
|
339 |
+
created_res_id = created_res['data']['dataset_id']
|
340 |
+
file_paths = ["test_data/test.txt", "test_data/empty.txt"]
|
341 |
+
ragflow.upload_local_file(created_res_id, file_paths)
|
342 |
+
# Call the list_document method
|
343 |
+
response = ragflow.list_files(created_res_id)
|
344 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 2
|
345 |
+
docs = response['data']['docs']
|
346 |
+
# reverse
|
347 |
+
i = 1
|
348 |
+
for doc in docs:
|
349 |
+
assert doc['name'] in file_paths[i]
|
350 |
+
i -= 1
|
351 |
+
|
352 |
+
def test_list_document_with_verifying_order_by_and_ascend(self):
|
353 |
+
"""
|
354 |
+
Test listing documents with verifying the functionality of order_by and ascend.
|
355 |
+
"""
|
356 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
357 |
+
created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
|
358 |
+
created_res_id = created_res['data']['dataset_id']
|
359 |
+
file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
|
360 |
+
ragflow.upload_local_file(created_res_id, file_paths)
|
361 |
+
# Call the list_document method
|
362 |
+
response = ragflow.list_files(created_res_id, descend=False)
|
363 |
+
assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 3
|
364 |
+
|
365 |
+
docs = response['data']['docs']
|
366 |
+
|
367 |
+
i = 0
|
368 |
+
for doc in docs:
|
369 |
+
assert doc['name'] in file_paths[i]
|
370 |
+
i += 1
|
371 |
+
|
372 |
+
# TODO: have to set the limitation of the number of documents
|
373 |
# ----------------------------download a file-----------------------------------------------------
|
374 |
|
375 |
# ----------------------------enable rename-----------------------------------------------------
|
376 |
|
|
|
|
|
377 |
# ----------------------------start parsing-----------------------------------------------------
|
378 |
|
379 |
# ----------------------------stop parsing-----------------------------------------------------
|
|
|
388 |
|
389 |
# ----------------------------insert a new chunk-----------------------------------------------------
|
390 |
|
|
|
|
|
391 |
# ----------------------------get a specific chunk-----------------------------------------------------
|
392 |
|
393 |
# ----------------------------retrieval test-----------------------------------------------------
|