Spaces:

Detomo
/

Aisatsu-robot

Sleeping

App Files Files Community

vumichien commited on Mar 23, 2023

Commit

443cd8b

1 Parent(s): 48cacbd

Create app.py

Browse files

Files changed (1) hide show

app.py +107 -0

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import gradio as gr
+import torch
+from ultralyticsplus import YOLO
+import numpy as np
+from sahi.prediction import ObjectPrediction, PredictionScore
+from sahi.utils.cv import (
+    get_bool_mask_from_coco_segmentation,
+    read_image_as_pil,
+    visualize_object_predictions,
+)
+from base64 import b64encode
+from io import BytesIO
+from gtts import gTTS
+from mtranslate import translate
+from speech_recognition import AudioFile, Recognizer
+import time
+model = YOLO('ultralyticsplus/yolov8s')
+CLASS = model.model.names
+def tts(text: str, language="ja") -> object:
+    """Converts text into autoplay html.
+    Args:
+        text (str): generated answer of bot
+    Returns:
+        html: autoplay object
+    """
+    tts_object = gTTS(text=text, lang=language, slow=False)
+    bytes_object = BytesIO()
+    tts_object.write_to_fp(bytes_object)
+    bytes_object.seek(0)
+    b64 = b64encode(bytes_object.getvalue()).decode()
+    html = f"""
+    <audio controls autoplay>
+    <source src="data:audio/wav;base64,{b64}" type="audio/wav">
+    </audio>
+    """
+    return html
+def yolov8_inference(
+    image,
+    area_thres=0.2,
+    defaul_bot_voice="おはいようございます"
+):
+    """
+    YOLOv8 inference function
+    Args:
+        image: Input image
+    Returns:
+        Rendered image
+    """
+    time.sleep(2)
+    # set model parameters
+    model.overrides['conf'] = 0.25  # NMS confidence threshold
+    model.overrides['iou'] = 0.45  # NMS IoU threshold
+    model.overrides['agnostic_nms'] = False  # NMS class-agnostic
+    model.overrides['max_det'] = 1000  # maximum number of detections per image
+    results = model.predict(image, show=False)[0]
+    image = read_image_as_pil(image)
+    np_image = np.ascontiguousarray(image)
+    masks, boxes = results.masks, results.boxes
+    area_image = image.width*image.height
+    object_predictions = []
+    html_bot_voice = ""
+    if boxes is not None:
+        det_ind = 0
+        for xyxy, conf, cls in zip(boxes.xyxy, boxes.conf, boxes.cls):
+            if int(cls) != 0:
+                continue
+            box = xyxy.tolist()
+            area_rate = (box[2] - box[0]) * (box[3] - box[1]) / area_image
+            if area_rate >= area_thres:
+                object_prediction = ObjectPrediction(
+                    bbox=box,
+                    category_name=CLASS[int(cls)],
+                    category_id=int(cls),
+                    score=area_rate,
+                )
+                object_predictions.append(object_prediction)
+                det_ind += 1
+                html_bot_voice = tts(defaul_bot_voice, language="ja")
+    result = visualize_object_predictions(
+        image=np_image,
+        object_prediction_list=object_predictions,
+        rect_th=2,
+        text_th=2,
+    )
+    return Image.fromarray(result["image"]), html_bot_voice
+outputs = [gr.Image(type="filepath", label="Output Image"),
+           gr.HTML()]
+title = "State-of-the-Art YOLO Models for Object detection"
+demo_app = gr.Interface(
+    fn=yolov8_inference,
+    inputs=gr.Image(source="webcam", streaming=True, label="Input Image"),
+    outputs=outputs,
+    title=title,
+    live=True,
+)
+demo_app.launch(debug=True)