File size: 3,142 Bytes
dd2ba72
 
 
 
1f353b4
 
 
 
dd2ba72
bfa5692
cab70c8
 
bfa5692
 
 
 
 
 
cab70c8
 
c2f9c01
bfa5692
cab70c8
dd2ba72
bfa5692
cab70c8
bfa5692
dd2ba72
bfa5692
1f353b4
 
 
 
 
dd2ba72
1f353b4
 
dd2ba72
1f353b4
 
 
 
 
 
 
 
dd2ba72
1f353b4
 
 
dd2ba72
1f353b4
 
 
 
dd2ba72
1f353b4
 
 
 
 
 
dd2ba72
1f353b4
dd2ba72
1f353b4
 
 
 
 
 
 
 
 
 
 
 
 
 
dd2ba72
1f353b4
 
 
dd2ba72
1f353b4
 
 
 
 
 
 
 
 
 
 
 
dd2ba72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
import torch
import cv2
import pytesseract
import numpy as np
from PIL import Image
import sys
import os

import sys
import torch
import torch.serialization
from pathlib import Path

# Add yolov10 repo to Python path
sys.path.append(str(Path(__file__).resolve().parent / "yolov10"))

# Now import YOLOv10 classes
from ultralytics import YOLO
from ultralytics.nn.tasks import YOLOv10DetectionModel

# Register YOLOv10DetectionModel to allow safe unpickling
torch.serialization.add_safe_globals({'YOLOv10DetectionModel': YOLOv10DetectionModel})

# Load the model safely
with torch.serialization.safe_globals({'YOLOv10DetectionModel': YOLOv10DetectionModel}):
    model = YOLO('/home/user/app/best.pt')  # Path to your trained YOLOv10n model

    
# Frame processing function
def process_frame(frame):
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_tensor = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
    img_tensor = img_tensor.unsqueeze(0)

    results = model(img_tensor, augment=False)
    results = model.nms(results)[0]

    extracted_texts = []
    confidences = []

    for det in results:
        x1, y1, x2, y2, conf, cls = det.tolist()
        if conf > 0.5:
            x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
            cls = int(cls)

            label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
            label = label_map.get(cls, "Unknown")
            percent = f"{conf * 100:.2f}%"

            # Draw box & label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(frame, f"{label}: {percent}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            # OCR
            lp_crop = frame[y1:y2, x1:x2]
            gray = cv2.cvtColor(lp_crop, cv2.COLOR_BGR2GRAY)
            text = pytesseract.image_to_string(gray, config="--psm 6 -l ben")
            extracted_texts.append(text.strip())
            confidences.append(percent)

    return frame, "\n".join(extracted_texts), ", ".join(confidences)

# Input handler
def process_input(input_file):
    file_path = input_file.name

    if file_path.endswith(('.mp4', '.avi', '.mov')):
        cap = cv2.VideoCapture(file_path)
        ret, frame = cap.read()
        cap.release()
        if not ret:
            return None, "Couldn't read video", ""
    else:
        frame = cv2.imread(file_path)
        if frame is None:
            return None, "Invalid image", ""

    processed_frame, text, confidence = process_frame(frame)
    processed_pil = Image.fromarray(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB))
    return processed_pil, text, confidence

# Gradio Interface
interface = gr.Interface(
    fn=process_input,
    inputs=gr.File(type="file", label="Upload Image or Video"),
    outputs=[
        gr.Image(type="pil", label="Detected Output"),
        gr.Textbox(label="Detected Text (Bangla)"),
        gr.Textbox(label="Confidence (%)")
    ],
    title="YOLOv10n License Plate Detector (Bangla)",
    description="Upload an image or video. Detects plates and extracts Bangla text using OCR (CPU)."
)

interface.launch()