Spaces:
Sleeping
Sleeping
File size: 2,772 Bytes
dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 1f353b4 dd2ba72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
import torch
import cv2
import pytesseract
import numpy as np
from PIL import Image
import sys
import os
# Add YOLOv10 repo to path
sys.path.append('./YOLOv10') # adjust path if needed
from models.common import DetectMultiBackend # YOLOv10 model loader
# Load model
model = DetectMultiBackend('best(3).pt', device='cpu') # your trained YOLOv10n model path
model.eval()
# Frame processing function
def process_frame(frame):
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img_tensor = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
img_tensor = img_tensor.unsqueeze(0)
results = model(img_tensor, augment=False)
results = model.nms(results)[0]
extracted_texts = []
confidences = []
for det in results:
x1, y1, x2, y2, conf, cls = det.tolist()
if conf > 0.5:
x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
cls = int(cls)
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
label = label_map.get(cls, "Unknown")
percent = f"{conf * 100:.2f}%"
# Draw box & label
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(frame, f"{label}: {percent}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# OCR
lp_crop = frame[y1:y2, x1:x2]
gray = cv2.cvtColor(lp_crop, cv2.COLOR_BGR2GRAY)
text = pytesseract.image_to_string(gray, config="--psm 6 -l ben")
extracted_texts.append(text.strip())
confidences.append(percent)
return frame, "\n".join(extracted_texts), ", ".join(confidences)
# Input handler
def process_input(input_file):
file_path = input_file.name
if file_path.endswith(('.mp4', '.avi', '.mov')):
cap = cv2.VideoCapture(file_path)
ret, frame = cap.read()
cap.release()
if not ret:
return None, "Couldn't read video", ""
else:
frame = cv2.imread(file_path)
if frame is None:
return None, "Invalid image", ""
processed_frame, text, confidence = process_frame(frame)
processed_pil = Image.fromarray(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB))
return processed_pil, text, confidence
# Gradio Interface
interface = gr.Interface(
fn=process_input,
inputs=gr.File(type="file", label="Upload Image or Video"),
outputs=[
gr.Image(type="pil", label="Detected Output"),
gr.Textbox(label="Detected Text (Bangla)"),
gr.Textbox(label="Confidence (%)")
],
title="YOLOv10n License Plate Detector (Bangla)",
description="Upload an image or video. Detects plates and extracts Bangla text using OCR (CPU)."
)
interface.launch()
|