File size: 2,605 Bytes
dd2ba72 1f353b4 581d1f5 4735088 9efaee0 78753d4 307c8f3 1dd705c 307c8f3 d85faf4 581d1f5 1f353b4 307c8f3 dd2ba72 1f353b4 e51c033 307c8f3 581d1f5 307c8f3 581d1f5 1f353b4 dd2ba72 307c8f3 dd2ba72 e51c033 1f353b4 dd2ba72 307c8f3 dd2ba72 1f353b4 28eb4e5 1f353b4 581d1f5 1f353b4 dd2ba72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR # Import PaddleOCR
from ultralytics import YOLO
# Load model
model = YOLO("/home/user/app/best.pt")
# Label map
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
# Initialize PaddleOCR (for Bangla OCR)
ocr = PaddleOCR(use_angle_cls=True, lang='bn') # For Bangla language
def process_frame(frame):
# Resize to YOLO input shape
input_img = cv2.resize(frame, (640, 640))
results = model(input_img)[0]
detections = results.boxes.data.cpu().numpy()
extracted_texts = []
confidences = []
for det in detections:
if len(det) < 6:
continue
x1, y1, x2, y2, conf, cls = det
x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
label = label_map.get(int(cls), "Unknown")
percent = f"{conf * 100:.2f}%"
# Draw box and label on image
cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# OCR using PaddleOCR
cropped = frame[y1:y2, x1:x2] # Use original frame for OCR
if cropped.size > 0:
# Convert to RGB and run OCR
result = ocr.ocr(cropped, cls=True)
for line in result[0]:
extracted_texts.append(line[1]) # Get the detected text
confidences.append(percent)
# Convert to PIL
annotated = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(annotated)
return pil_img, "\n".join(extracted_texts), ", ".join(confidences)
def process_input(input_file):
file_path = input_file.name
if file_path.endswith(('.mp4', '.avi', '.mov')):
cap = cv2.VideoCapture(file_path)
ret, frame = cap.read()
cap.release()
if not ret:
return None, "Couldn't read video", ""
else:
frame = cv2.imread(file_path)
if frame is None:
return None, "Invalid image", ""
return process_frame(frame)
interface = gr.Interface(
fn=process_input,
inputs=gr.File(type="filepath", label="Upload Image or Video"),
outputs=[
gr.Image(type="pil", label="Detected Output"),
gr.Textbox(label="Detected Text (Bangla)"),
gr.Textbox(label="Confidence (%)")
],
title="YOLOv10n License Plate Detector (Bangla)",
description="Upload an image or video. Detects license plates and extracts Bangla text using PaddleOCR."
)
interface.launch()
|