File size: 2,430 Bytes
dd2ba72
 
 
 
1f353b4
 
4735088
dd2ba72
78753d4
307c8f3
1dd705c
307c8f3
 
d85faf4
1f353b4
307c8f3
 
 
 
dd2ba72
1f353b4
 
 
e51c033
307c8f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f353b4
 
 
dd2ba72
307c8f3
 
 
 
 
dd2ba72
e51c033
1f353b4
 
 
 
 
 
 
 
 
 
 
 
 
dd2ba72
307c8f3
 
dd2ba72
1f353b4
 
28eb4e5
1f353b4
 
 
 
 
 
307c8f3
1f353b4
dd2ba72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import torch
import cv2
import pytesseract
import numpy as np
from PIL import Image
from ultralytics import YOLO

# Load model
model = YOLO("/home/user/app/best.pt")

# Label map
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}

def process_frame(frame):
    # Resize to YOLO input shape
    input_img = cv2.resize(frame, (640, 640))
    results = model(input_img)[0]
    detections = results.boxes.data.cpu().numpy()

    extracted_texts = []
    confidences = []

    for det in detections:
        if len(det) < 6:
            continue

        x1, y1, x2, y2, conf, cls = det
        x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
        label = label_map.get(int(cls), "Unknown")
        percent = f"{conf * 100:.2f}%"

        # Draw box and label on image
        cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # OCR
        cropped = frame[y1:y2, x1:x2]  # Use original frame for OCR
        if cropped.size > 0:
            gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
            text = pytesseract.image_to_string(gray, config="--psm 6 -l ben")
            extracted_texts.append(text.strip())
            confidences.append(percent)

    # Convert to PIL
    annotated = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(annotated)

    return pil_img, "\n".join(extracted_texts), ", ".join(confidences)


def process_input(input_file):
    file_path = input_file.name

    if file_path.endswith(('.mp4', '.avi', '.mov')):
        cap = cv2.VideoCapture(file_path)
        ret, frame = cap.read()
        cap.release()
        if not ret:
            return None, "Couldn't read video", ""
    else:
        frame = cv2.imread(file_path)
        if frame is None:
            return None, "Invalid image", ""

    return process_frame(frame)


interface = gr.Interface(
    fn=process_input,
    inputs=gr.File(type="filepath", label="Upload Image or Video"),
    outputs=[
        gr.Image(type="pil", label="Detected Output"),
        gr.Textbox(label="Detected Text (Bangla)"),
        gr.Textbox(label="Confidence (%)")
    ],
    title="YOLOv10n License Plate Detector (Bangla)",
    description="Upload an image or video. Detects license plates and extracts Bangla text using OCR."
)

interface.launch()