cecilia-uu
commited on
Commit
·
12defec
1
Parent(s):
67bae62
API: completed delete_doc api (#1290)
Browse files### What problem does this PR solve?
Adds the functionality of deleting documentation
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/documents_api.py +58 -2
- sdk/python/ragflow/ragflow.py +6 -3
- sdk/python/test/test_document.py +87 -3
api/apps/documents_api.py
CHANGED
@@ -24,6 +24,7 @@ from flask_login import login_required, current_user
|
|
24 |
from api.db import FileType, ParserType
|
25 |
from api.db.services import duplicate_name
|
26 |
from api.db.services.document_service import DocumentService
|
|
|
27 |
from api.db.services.file_service import FileService
|
28 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
29 |
from api.settings import RetCode
|
@@ -31,6 +32,8 @@ from api.utils import get_uuid
|
|
31 |
from api.utils.api_utils import construct_json_result
|
32 |
from api.utils.file_utils import filename_type, thumbnail
|
33 |
from rag.utils.minio_conn import MINIO
|
|
|
|
|
34 |
|
35 |
|
36 |
MAXIMUM_OF_UPLOADING_FILES = 256
|
@@ -89,6 +92,7 @@ def upload(dataset_id):
|
|
89 |
# grab all the errs
|
90 |
err = []
|
91 |
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
|
|
92 |
for file in file_objs:
|
93 |
try:
|
94 |
# TODO: get this value from the database as some tenants have this limit while others don't
|
@@ -132,6 +136,7 @@ def upload(dataset_id):
|
|
132 |
DocumentService.insert(doc)
|
133 |
|
134 |
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
|
|
135 |
except Exception as e:
|
136 |
err.append(file.filename + ": " + str(e))
|
137 |
|
@@ -139,14 +144,65 @@ def upload(dataset_id):
|
|
139 |
# return all the errors
|
140 |
return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
141 |
# success
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
return construct_json_result(data=True, code=RetCode.SUCCESS)
|
143 |
|
144 |
# ----------------------------upload online files------------------------------------------------
|
145 |
|
146 |
# ----------------------------download a file-----------------------------------------------------
|
147 |
|
148 |
-
# ----------------------------delete a file-----------------------------------------------------
|
149 |
-
|
150 |
# ----------------------------enable rename-----------------------------------------------------
|
151 |
|
152 |
# ----------------------------list files-----------------------------------------------------
|
|
|
24 |
from api.db import FileType, ParserType
|
25 |
from api.db.services import duplicate_name
|
26 |
from api.db.services.document_service import DocumentService
|
27 |
+
from api.db.services.file2document_service import File2DocumentService
|
28 |
from api.db.services.file_service import FileService
|
29 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
30 |
from api.settings import RetCode
|
|
|
32 |
from api.utils.api_utils import construct_json_result
|
33 |
from api.utils.file_utils import filename_type, thumbnail
|
34 |
from rag.utils.minio_conn import MINIO
|
35 |
+
from api.db.db_models import Task, File
|
36 |
+
from api.db import FileType, TaskStatus, ParserType, FileSource
|
37 |
|
38 |
|
39 |
MAXIMUM_OF_UPLOADING_FILES = 256
|
|
|
92 |
# grab all the errs
|
93 |
err = []
|
94 |
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
95 |
+
uploaded_docs_json = []
|
96 |
for file in file_objs:
|
97 |
try:
|
98 |
# TODO: get this value from the database as some tenants have this limit while others don't
|
|
|
136 |
DocumentService.insert(doc)
|
137 |
|
138 |
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
139 |
+
uploaded_docs_json.append(doc)
|
140 |
except Exception as e:
|
141 |
err.append(file.filename + ": " + str(e))
|
142 |
|
|
|
144 |
# return all the errors
|
145 |
return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
146 |
# success
|
147 |
+
return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
|
148 |
+
|
149 |
+
# ----------------------------delete a file-----------------------------------------------------
|
150 |
+
@manager.route('/<dataset_id>/<document_id>', methods=['DELETE'])
|
151 |
+
@login_required
|
152 |
+
def delete(document_id, dataset_id): # string
|
153 |
+
# get the root folder
|
154 |
+
root_folder = FileService.get_root_folder(current_user.id)
|
155 |
+
# parent file's id
|
156 |
+
parent_file_id = root_folder["id"]
|
157 |
+
# consider the new user
|
158 |
+
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
159 |
+
# store all the errors that may have
|
160 |
+
errors = ""
|
161 |
+
try:
|
162 |
+
# whether there is this document
|
163 |
+
exist, doc = DocumentService.get_by_id(document_id)
|
164 |
+
if not exist:
|
165 |
+
return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
|
166 |
+
# whether this doc is authorized by this tenant
|
167 |
+
tenant_id = DocumentService.get_tenant_id(document_id)
|
168 |
+
if not tenant_id:
|
169 |
+
return construct_json_result(message=f"You cannot delete this document {document_id} due to the authorization"
|
170 |
+
f" reason!", code=RetCode.AUTHENTICATION_ERROR)
|
171 |
+
|
172 |
+
# get the doc's id and location
|
173 |
+
real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id)
|
174 |
+
|
175 |
+
if real_dataset_id != dataset_id:
|
176 |
+
return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
|
177 |
+
f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
|
178 |
+
|
179 |
+
# there is an issue when removing
|
180 |
+
if not DocumentService.remove_document(doc, tenant_id):
|
181 |
+
return construct_json_result(
|
182 |
+
message="There was an error during the document removal process. Please check the status of the "
|
183 |
+
"RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
|
184 |
+
|
185 |
+
# fetch the File2Document record associated with the provided document ID.
|
186 |
+
file_to_doc = File2DocumentService.get_by_document_id(document_id)
|
187 |
+
# delete the associated File record.
|
188 |
+
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
|
189 |
+
# delete the File2Document record itself using the document ID. This removes the
|
190 |
+
# association between the document and the file after the File record has been deleted.
|
191 |
+
File2DocumentService.delete_by_document_id(document_id)
|
192 |
+
|
193 |
+
# delete it from minio
|
194 |
+
MINIO.rm(dataset_id, location)
|
195 |
+
except Exception as e:
|
196 |
+
errors += str(e)
|
197 |
+
if errors:
|
198 |
+
return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
|
199 |
+
|
200 |
return construct_json_result(data=True, code=RetCode.SUCCESS)
|
201 |
|
202 |
# ----------------------------upload online files------------------------------------------------
|
203 |
|
204 |
# ----------------------------download a file-----------------------------------------------------
|
205 |
|
|
|
|
|
206 |
# ----------------------------enable rename-----------------------------------------------------
|
207 |
|
208 |
# ----------------------------list files-----------------------------------------------------
|
sdk/python/ragflow/ragflow.py
CHANGED
@@ -101,10 +101,13 @@ class RAGFlow:
|
|
101 |
result_dict = json.loads(res.text)
|
102 |
return result_dict
|
103 |
|
104 |
-
# ----------------------------upload remote files-----------------------------------------------------
|
105 |
-
# ----------------------------download a file-----------------------------------------------------
|
106 |
-
|
107 |
# ----------------------------delete a file-----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
# ----------------------------enable rename-----------------------------------------------------
|
110 |
|
|
|
101 |
result_dict = json.loads(res.text)
|
102 |
return result_dict
|
103 |
|
|
|
|
|
|
|
104 |
# ----------------------------delete a file-----------------------------------------------------
|
105 |
+
def delete_files(self, document_id, dataset_id):
|
106 |
+
endpoint = f"{self.document_url}/{dataset_id}/{document_id}"
|
107 |
+
res = requests.delete(endpoint, headers=self.authorization_header)
|
108 |
+
return res.json()
|
109 |
+
|
110 |
+
# ----------------------------download a file-----------------------------------------------------
|
111 |
|
112 |
# ----------------------------enable rename-----------------------------------------------------
|
113 |
|
sdk/python/test/test_document.py
CHANGED
@@ -149,11 +149,95 @@ class TestFile(TestSdk):
|
|
149 |
res = ragflow.upload_local_file(dataset_id, file_paths)
|
150 |
assert res['code'] == RetCode.ARGUMENT_ERROR and res['message'] == 'Remote files have not unsupported.'
|
151 |
|
152 |
-
# ----------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
# ----------------------------enable rename-----------------------------------------------------
|
159 |
|
|
|
149 |
res = ragflow.upload_local_file(dataset_id, file_paths)
|
150 |
assert res['code'] == RetCode.ARGUMENT_ERROR and res['message'] == 'Remote files have not unsupported.'
|
151 |
|
152 |
+
# ----------------------------delete a file-----------------------------------------------------
|
153 |
+
def test_delete_one_file(self):
|
154 |
+
"""
|
155 |
+
Test deleting one file with success.
|
156 |
+
"""
|
157 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
158 |
+
created_res = ragflow.create_dataset("test_delete_one_file")
|
159 |
+
dataset_id = created_res['data']['dataset_id']
|
160 |
+
file_paths = ["test_data/test.txt"]
|
161 |
+
res = ragflow.upload_local_file(dataset_id, file_paths)
|
162 |
+
# get the doc_id
|
163 |
+
data = res['data'][0]
|
164 |
+
doc_id = data['id']
|
165 |
+
# delete the files
|
166 |
+
deleted_res = ragflow.delete_files(doc_id, dataset_id)
|
167 |
+
# assert value
|
168 |
+
assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
|
169 |
|
170 |
+
def test_delete_document_with_not_existing_document(self):
|
171 |
+
"""
|
172 |
+
Test deleting a document that does not exist with failure.
|
173 |
+
"""
|
174 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
175 |
+
created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document")
|
176 |
+
dataset_id = created_res['data']['dataset_id']
|
177 |
+
res = ragflow.delete_files("111", dataset_id)
|
178 |
+
assert res['code'] == RetCode.DATA_ERROR and res['message'] == 'Document 111 not found!'
|
179 |
|
180 |
+
def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self):
|
181 |
+
"""
|
182 |
+
Test deleting documents when uploading 100 docs and deleting 100 docs.
|
183 |
+
"""
|
184 |
+
# upload 100 docs
|
185 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
186 |
+
created_res = ragflow.create_dataset("test_delete_one_file")
|
187 |
+
dataset_id = created_res['data']['dataset_id']
|
188 |
+
file_paths = ["test_data/test.txt"] * 100
|
189 |
+
res = ragflow.upload_local_file(dataset_id, file_paths)
|
190 |
+
|
191 |
+
# get the doc_id
|
192 |
+
data = res['data']
|
193 |
+
for d in data:
|
194 |
+
doc_id = d['id']
|
195 |
+
# delete the files
|
196 |
+
deleted_res = ragflow.delete_files(doc_id, dataset_id)
|
197 |
+
# assert value
|
198 |
+
assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
|
199 |
+
|
200 |
+
def test_delete_document_from_nonexistent_dataset(self):
|
201 |
+
"""
|
202 |
+
Test deleting documents from a non-existent dataset
|
203 |
+
"""
|
204 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
205 |
+
created_res = ragflow.create_dataset("test_delete_one_file")
|
206 |
+
dataset_id = created_res['data']['dataset_id']
|
207 |
+
file_paths = ["test_data/test.txt"]
|
208 |
+
res = ragflow.upload_local_file(dataset_id, file_paths)
|
209 |
+
# get the doc_id
|
210 |
+
data = res['data'][0]
|
211 |
+
doc_id = data['id']
|
212 |
+
# delete the files
|
213 |
+
deleted_res = ragflow.delete_files(doc_id, "000")
|
214 |
+
# assert value
|
215 |
+
assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
|
216 |
+
f'The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.')
|
217 |
+
|
218 |
+
def test_delete_document_which_is_located_in_other_dataset(self):
|
219 |
+
"""
|
220 |
+
Test deleting a document which is located in other dataset.
|
221 |
+
"""
|
222 |
+
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
223 |
+
# upload a document
|
224 |
+
created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset")
|
225 |
+
created_res_id = created_res['data']['dataset_id']
|
226 |
+
file_paths = ["test_data/test.txt"]
|
227 |
+
res = ragflow.upload_local_file(created_res_id, file_paths)
|
228 |
+
# other dataset
|
229 |
+
other_res = ragflow.create_dataset("other_dataset")
|
230 |
+
other_dataset_id = other_res['data']['dataset_id']
|
231 |
+
# get the doc_id
|
232 |
+
data = res['data'][0]
|
233 |
+
doc_id = data['id']
|
234 |
+
# delete the files from the other dataset
|
235 |
+
deleted_res = ragflow.delete_files(doc_id, other_dataset_id)
|
236 |
+
# assert value
|
237 |
+
assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
|
238 |
+
f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')
|
239 |
+
|
240 |
+
# ----------------------------download a file-----------------------------------------------------
|
241 |
|
242 |
# ----------------------------enable rename-----------------------------------------------------
|
243 |
|