GYH
		
	commited on
		
		
					Commit 
							
							·
						
						83bd6a2
	
1
								Parent(s):
							
							e346b5a
								
Updated document upload method (#777)
Browse files### What problem does this PR solve?
api_app.py
/document/upload 
add two non mandatory parameters
parser_id:
[naive,qaresume,manual,table,paper,book,laws,presentation,picture,one]
run: 1
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/api_app.py +38 -5
    	
        api/apps/api_app.py
    CHANGED
    
    | @@ -31,11 +31,11 @@ from api.settings import RetCode | |
| 31 | 
             
            from api.utils import get_uuid, current_timestamp, datetime_format
         | 
| 32 | 
             
            from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
         | 
| 33 | 
             
            from itsdangerous import URLSafeTimedSerializer
         | 
| 34 | 
            -
             | 
| 35 | 
             
            from api.utils.file_utils import filename_type, thumbnail
         | 
| 36 | 
             
            from rag.utils.minio_conn import MINIO
         | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
             
            def generate_confirmation_token(tenent_id):
         | 
| 40 | 
             
                serializer = URLSafeTimedSerializer(tenent_id)
         | 
| 41 | 
             
                return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
         | 
| @@ -229,6 +229,7 @@ def upload(): | |
| 229 | 
             
                    return get_json_result(
         | 
| 230 | 
             
                        data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
         | 
| 231 |  | 
|  | |
| 232 | 
             
                file = request.files['file']
         | 
| 233 | 
             
                if file.filename == '':
         | 
| 234 | 
             
                    return get_json_result(
         | 
| @@ -252,6 +253,7 @@ def upload(): | |
| 252 | 
             
                        location += "_"
         | 
| 253 | 
             
                    blob = request.files['file'].read()
         | 
| 254 | 
             
                    MINIO.put(kb_id, location, blob)
         | 
|  | |
| 255 | 
             
                    doc = {
         | 
| 256 | 
             
                        "id": get_uuid(),
         | 
| 257 | 
             
                        "kb_id": kb.id,
         | 
| @@ -264,11 +266,42 @@ def upload(): | |
| 264 | 
             
                        "size": len(blob),
         | 
| 265 | 
             
                        "thumbnail": thumbnail(filename, blob)
         | 
| 266 | 
             
                    }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 267 | 
             
                    if doc["type"] == FileType.VISUAL:
         | 
| 268 | 
             
                        doc["parser_id"] = ParserType.PICTURE.value
         | 
| 269 | 
             
                    if re.search(r"\.(ppt|pptx|pages)$", filename):
         | 
| 270 | 
             
                        doc["parser_id"] = ParserType.PRESENTATION.value
         | 
| 271 | 
            -
             | 
| 272 | 
            -
                     | 
|  | |
| 273 | 
             
                except Exception as e:
         | 
| 274 | 
             
                    return server_error_response(e)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 31 | 
             
            from api.utils import get_uuid, current_timestamp, datetime_format
         | 
| 32 | 
             
            from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
         | 
| 33 | 
             
            from itsdangerous import URLSafeTimedSerializer
         | 
| 34 | 
            +
            from api.db.services.task_service import TaskService, queue_tasks
         | 
| 35 | 
             
            from api.utils.file_utils import filename_type, thumbnail
         | 
| 36 | 
             
            from rag.utils.minio_conn import MINIO
         | 
| 37 | 
            +
            from api.db.db_models import Task
         | 
| 38 | 
            +
            from api.db.services.file2document_service import File2DocumentService
         | 
| 39 | 
             
            def generate_confirmation_token(tenent_id):
         | 
| 40 | 
             
                serializer = URLSafeTimedSerializer(tenent_id)
         | 
| 41 | 
             
                return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
         | 
|  | |
| 229 | 
             
                    return get_json_result(
         | 
| 230 | 
             
                        data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
         | 
| 231 |  | 
| 232 | 
            +
             | 
| 233 | 
             
                file = request.files['file']
         | 
| 234 | 
             
                if file.filename == '':
         | 
| 235 | 
             
                    return get_json_result(
         | 
|  | |
| 253 | 
             
                        location += "_"
         | 
| 254 | 
             
                    blob = request.files['file'].read()
         | 
| 255 | 
             
                    MINIO.put(kb_id, location, blob)
         | 
| 256 | 
            +
             | 
| 257 | 
             
                    doc = {
         | 
| 258 | 
             
                        "id": get_uuid(),
         | 
| 259 | 
             
                        "kb_id": kb.id,
         | 
|  | |
| 266 | 
             
                        "size": len(blob),
         | 
| 267 | 
             
                        "thumbnail": thumbnail(filename, blob)
         | 
| 268 | 
             
                    }
         | 
| 269 | 
            +
             | 
| 270 | 
            +
                    form_data=request.form
         | 
| 271 | 
            +
                    if "parser_id" in form_data.keys():
         | 
| 272 | 
            +
                        if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]:
         | 
| 273 | 
            +
                            doc["parser_id"] = request.form.get("parser_id").strip()
         | 
| 274 | 
             
                    if doc["type"] == FileType.VISUAL:
         | 
| 275 | 
             
                        doc["parser_id"] = ParserType.PICTURE.value
         | 
| 276 | 
             
                    if re.search(r"\.(ppt|pptx|pages)$", filename):
         | 
| 277 | 
             
                        doc["parser_id"] = ParserType.PRESENTATION.value
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                    doc_result = DocumentService.insert(doc)
         | 
| 280 | 
            +
             | 
| 281 | 
             
                except Exception as e:
         | 
| 282 | 
             
                    return server_error_response(e)
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                if "run" in form_data.keys():
         | 
| 285 | 
            +
                    if request.form.get("run").strip() == "1":
         | 
| 286 | 
            +
                        try:
         | 
| 287 | 
            +
                            info = {"run": 1, "progress": 0}
         | 
| 288 | 
            +
                            info["progress_msg"] = ""
         | 
| 289 | 
            +
                            info["chunk_num"] = 0
         | 
| 290 | 
            +
                            info["token_num"] = 0
         | 
| 291 | 
            +
                            DocumentService.update_by_id(doc["id"], info)
         | 
| 292 | 
            +
                            # if str(req["run"]) == TaskStatus.CANCEL.value:
         | 
| 293 | 
            +
                            tenant_id = DocumentService.get_tenant_id(doc["id"])
         | 
| 294 | 
            +
                            if not tenant_id:
         | 
| 295 | 
            +
                                return get_data_error_result(retmsg="Tenant not found!")
         | 
| 296 | 
            +
             | 
| 297 | 
            +
                            #e, doc = DocumentService.get_by_id(doc["id"])
         | 
| 298 | 
            +
                            TaskService.filter_delete([Task.doc_id == doc["id"]])
         | 
| 299 | 
            +
                            e, doc = DocumentService.get_by_id(doc["id"])
         | 
| 300 | 
            +
                            doc = doc.to_dict()
         | 
| 301 | 
            +
                            doc["tenant_id"] = tenant_id
         | 
| 302 | 
            +
                            bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"])
         | 
| 303 | 
            +
                            queue_tasks(doc, bucket, name)
         | 
| 304 | 
            +
                        except Exception as e:
         | 
| 305 | 
            +
                             return server_error_response(e)
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                return get_json_result(data=doc_result.to_json())
         |