KevinHuSh
commited on
Commit
·
abeee5e
1
Parent(s):
9b6b3f7
refine document upload (#602)
Browse files### What problem does this PR solve?
#567
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- api/apps/document_app.py +52 -48
- docker/entrypoint.sh +2 -1
api/apps/document_app.py
CHANGED
|
@@ -51,55 +51,59 @@ def upload():
|
|
| 51 |
if 'file' not in request.files:
|
| 52 |
return get_json_result(
|
| 53 |
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
| 54 |
-
file = request.files['file']
|
| 55 |
-
if file.filename == '':
|
| 56 |
-
return get_json_result(
|
| 57 |
-
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
if
|
| 62 |
-
return
|
| 63 |
-
retmsg=
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
@manager.route('/create', methods=['POST'])
|
|
|
|
| 51 |
if 'file' not in request.files:
|
| 52 |
return get_json_result(
|
| 53 |
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
file_objs = request.files.getlist('file')
|
| 56 |
+
for file_obj in file_objs:
|
| 57 |
+
if file_obj.filename == '':
|
| 58 |
+
return get_json_result(
|
| 59 |
+
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
| 60 |
+
|
| 61 |
+
err = []
|
| 62 |
+
for file in file_objs:
|
| 63 |
+
try:
|
| 64 |
+
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
| 65 |
+
if not e:
|
| 66 |
+
raise LookupError("Can't find this knowledgebase!")
|
| 67 |
+
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
| 68 |
+
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
| 69 |
+
raise RuntimeError("Exceed the maximum file number of a free user!")
|
| 70 |
+
|
| 71 |
+
filename = duplicate_name(
|
| 72 |
+
DocumentService.query,
|
| 73 |
+
name=file.filename,
|
| 74 |
+
kb_id=kb.id)
|
| 75 |
+
filetype = filename_type(filename)
|
| 76 |
+
if filetype == FileType.OTHER.value:
|
| 77 |
+
raise RuntimeError("This type of file has not been supported yet!")
|
| 78 |
+
|
| 79 |
+
location = filename
|
| 80 |
+
while MINIO.obj_exist(kb_id, location):
|
| 81 |
+
location += "_"
|
| 82 |
+
blob = file.read()
|
| 83 |
+
MINIO.put(kb_id, location, blob)
|
| 84 |
+
doc = {
|
| 85 |
+
"id": get_uuid(),
|
| 86 |
+
"kb_id": kb.id,
|
| 87 |
+
"parser_id": kb.parser_id,
|
| 88 |
+
"parser_config": kb.parser_config,
|
| 89 |
+
"created_by": current_user.id,
|
| 90 |
+
"type": filetype,
|
| 91 |
+
"name": filename,
|
| 92 |
+
"location": location,
|
| 93 |
+
"size": len(blob),
|
| 94 |
+
"thumbnail": thumbnail(filename, blob)
|
| 95 |
+
}
|
| 96 |
+
if doc["type"] == FileType.VISUAL:
|
| 97 |
+
doc["parser_id"] = ParserType.PICTURE.value
|
| 98 |
+
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 99 |
+
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 100 |
+
DocumentService.insert(doc)
|
| 101 |
+
except Exception as e:
|
| 102 |
+
err.append(file.filename + ": " + str(e))
|
| 103 |
+
if err:
|
| 104 |
+
return get_json_result(
|
| 105 |
+
data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
| 106 |
+
return get_json_result(data=True)
|
| 107 |
|
| 108 |
|
| 109 |
@manager.route('/create', methods=['POST'])
|
docker/entrypoint.sh
CHANGED
|
@@ -34,6 +34,7 @@ do
|
|
| 34 |
task_exe $i $WS &
|
| 35 |
done
|
| 36 |
|
|
|
|
| 37 |
$PY api/ragflow_server.py
|
| 38 |
-
|
| 39 |
wait;
|
|
|
|
| 34 |
task_exe $i $WS &
|
| 35 |
done
|
| 36 |
|
| 37 |
+
while [ 1 -eq 1 ];do
|
| 38 |
$PY api/ragflow_server.py
|
| 39 |
+
done
|
| 40 |
wait;
|