Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -5,12 +5,12 @@ from base64 import b64encode
|
|
5 |
from speech_recognition import AudioFile, Recognizer
|
6 |
import numpy as np
|
7 |
from scipy.spatial import distance as dist
|
8 |
-
from typing import Union
|
9 |
|
10 |
-
from fastapi import FastAPI, File, UploadFile
|
11 |
from fastapi.responses import StreamingResponse
|
12 |
|
13 |
-
from utils import tts, read_image_file, pil_to_base64, get_hist
|
14 |
from typing import Optional
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
|
@@ -24,7 +24,7 @@ model = YOLO(model_path)
|
|
24 |
CLASS = model.model.names
|
25 |
defaul_bot_voice = "γγ―γγγγγγγΎγ"
|
26 |
area_thres = 0.3
|
27 |
-
|
28 |
app = FastAPI()
|
29 |
|
30 |
@app.get("/")
|
@@ -35,7 +35,8 @@ def read_root():
|
|
35 |
@app.post("/aisatsu_api/")
|
36 |
async def predict_api(
|
37 |
file: UploadFile = File(...),
|
38 |
-
last_seen: Union[UploadFile, None] = File(None)
|
|
|
39 |
):
|
40 |
total_time = time.time()
|
41 |
start_time = time.time()
|
@@ -62,7 +63,10 @@ async def predict_api(
|
|
62 |
print("Get face time", time.time() - start_time)
|
63 |
start_time = time.time()
|
64 |
if last_seen is not None:
|
65 |
-
|
|
|
|
|
|
|
66 |
if out_img is not None:
|
67 |
diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
|
68 |
print("Hist time", time.time() - start_time)
|
@@ -72,17 +76,24 @@ async def predict_api(
|
|
72 |
voice_bot_path = tts(defaul_bot_voice, language="ja")
|
73 |
image_bot_path = pil_to_base64(out_img)
|
74 |
print("Voice time", time.time() - start_time)
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
else:
|
88 |
return {"message": "No face detected"}
|
|
|
5 |
from speech_recognition import AudioFile, Recognizer
|
6 |
import numpy as np
|
7 |
from scipy.spatial import distance as dist
|
8 |
+
from typing import Union, Optional
|
9 |
|
10 |
+
from fastapi import FastAPI, File, UploadFile, Form
|
11 |
from fastapi.responses import StreamingResponse
|
12 |
|
13 |
+
from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
|
14 |
from typing import Optional
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
|
|
|
24 |
CLASS = model.model.names
|
25 |
defaul_bot_voice = "γγ―γγγγγγγΎγ"
|
26 |
area_thres = 0.3
|
27 |
+
ZIP = False
|
28 |
app = FastAPI()
|
29 |
|
30 |
@app.get("/")
|
|
|
35 |
@app.post("/aisatsu_api/")
|
36 |
async def predict_api(
|
37 |
file: UploadFile = File(...),
|
38 |
+
# last_seen: Union[UploadFile, None] = File(None),
|
39 |
+
last_seen: Optional[str] = Form(None),
|
40 |
):
|
41 |
total_time = time.time()
|
42 |
start_time = time.time()
|
|
|
63 |
print("Get face time", time.time() - start_time)
|
64 |
start_time = time.time()
|
65 |
if last_seen is not None:
|
66 |
+
if type(last_seen) == str:
|
67 |
+
last_seen = base64_to_pil(last_seen)
|
68 |
+
else:
|
69 |
+
last_seen = read_image_file(await last_seen.read())
|
70 |
if out_img is not None:
|
71 |
diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
|
72 |
print("Hist time", time.time() - start_time)
|
|
|
76 |
voice_bot_path = tts(defaul_bot_voice, language="ja")
|
77 |
image_bot_path = pil_to_base64(out_img)
|
78 |
print("Voice time", time.time() - start_time)
|
79 |
+
if ZIP:
|
80 |
+
io = BytesIO()
|
81 |
+
zip_filename = "final_archive.zip"
|
82 |
+
with zipfile.ZipFile(io, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
|
83 |
+
for file_path in [voice_bot_path, image_bot_path]:
|
84 |
+
zf.write(file_path)
|
85 |
+
zf.close()
|
86 |
+
print("Total time", time.time() - total_time)
|
87 |
+
return StreamingResponse(
|
88 |
+
iter([io.getvalue()]),
|
89 |
+
media_type="application/x-zip-compressed",
|
90 |
+
headers={"Content-Disposition": f"attachment;filename=%s" % zip_filename}
|
91 |
+
)
|
92 |
+
else:
|
93 |
+
print("Total time", time.time() - total_time)
|
94 |
+
return {
|
95 |
+
"voice": voice_bot_path,
|
96 |
+
"image": image_bot_path
|
97 |
+
}
|
98 |
else:
|
99 |
return {"message": "No face detected"}
|