Spaces:

SakibHasan
/

license_plate_classifier2

Running

File size: 2,605 Bytes

dd2ba72
 
 
1f353b4
 
581d1f5
4735088
9efaee0
78753d4
307c8f3
1dd705c
307c8f3
 
d85faf4
581d1f5
 
 
1f353b4
307c8f3
 
 
 
dd2ba72
1f353b4
 
 
e51c033
307c8f3
 
 
 
 
 
 
 
 
 
 
 
 
581d1f5
307c8f3
 
581d1f5
 
 
 
1f353b4
dd2ba72
307c8f3
 
 
 
 
dd2ba72
e51c033
1f353b4
 
 
 
 
 
 
 
 
 
 
 
 
dd2ba72
307c8f3
 
dd2ba72
1f353b4
 
28eb4e5
1f353b4
 
 
 
 
 
581d1f5
1f353b4
dd2ba72

import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR  # Import PaddleOCR
from ultralytics import YOLO

# Load model
model = YOLO("/home/user/app/best.pt")

# Label map
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}

# Initialize PaddleOCR (for Bangla OCR)
ocr = PaddleOCR(use_angle_cls=True, lang='bn')  # For Bangla language

def process_frame(frame):
    # Resize to YOLO input shape
    input_img = cv2.resize(frame, (640, 640))
    results = model(input_img)[0]
    detections = results.boxes.data.cpu().numpy()

    extracted_texts = []
    confidences = []

    for det in detections:
        if len(det) < 6:
            continue

        x1, y1, x2, y2, conf, cls = det
        x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
        label = label_map.get(int(cls), "Unknown")
        percent = f"{conf * 100:.2f}%"

        # Draw box and label on image
        cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # OCR using PaddleOCR
        cropped = frame[y1:y2, x1:x2]  # Use original frame for OCR
        if cropped.size > 0:
            # Convert to RGB and run OCR
            result = ocr.ocr(cropped, cls=True)
            for line in result[0]:
                extracted_texts.append(line[1])  # Get the detected text
            confidences.append(percent)

    # Convert to PIL
    annotated = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(annotated)

    return pil_img, "\n".join(extracted_texts), ", ".join(confidences)


def process_input(input_file):
    file_path = input_file.name

    if file_path.endswith(('.mp4', '.avi', '.mov')):
        cap = cv2.VideoCapture(file_path)
        ret, frame = cap.read()
        cap.release()
        if not ret:
            return None, "Couldn't read video", ""
    else:
        frame = cv2.imread(file_path)
        if frame is None:
            return None, "Invalid image", ""

    return process_frame(frame)


interface = gr.Interface(
    fn=process_input,
    inputs=gr.File(type="filepath", label="Upload Image or Video"),
    outputs=[
        gr.Image(type="pil", label="Detected Output"),
        gr.Textbox(label="Detected Text (Bangla)"),
        gr.Textbox(label="Confidence (%)")
    ],
    title="YOLOv10n License Plate Detector (Bangla)",
    description="Upload an image or video. Detects license plates and extracts Bangla text using PaddleOCR."
)

interface.launch()