import gradio as gr import torch import cv2 import pytesseract import numpy as np from PIL import Image import sys import os # Add YOLOv10 repo to path sys.path.append('/home/user/app/YOLOv5') # adjust path if needed from models.common import DetectMultiBackend # YOLOv10 model loader # Load model model = DetectMultiBackend('best(3).pt', device='cpu') # your trained YOLOv10n model path model.eval() # Frame processing function def process_frame(frame): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img_tensor = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0 img_tensor = img_tensor.unsqueeze(0) results = model(img_tensor, augment=False) results = model.nms(results)[0] extracted_texts = [] confidences = [] for det in results: x1, y1, x2, y2, conf, cls = det.tolist() if conf > 0.5: x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) cls = int(cls) label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"} label = label_map.get(cls, "Unknown") percent = f"{conf * 100:.2f}%" # Draw box & label cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(frame, f"{label}: {percent}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # OCR lp_crop = frame[y1:y2, x1:x2] gray = cv2.cvtColor(lp_crop, cv2.COLOR_BGR2GRAY) text = pytesseract.image_to_string(gray, config="--psm 6 -l ben") extracted_texts.append(text.strip()) confidences.append(percent) return frame, "\n".join(extracted_texts), ", ".join(confidences) # Input handler def process_input(input_file): file_path = input_file.name if file_path.endswith(('.mp4', '.avi', '.mov')): cap = cv2.VideoCapture(file_path) ret, frame = cap.read() cap.release() if not ret: return None, "Couldn't read video", "" else: frame = cv2.imread(file_path) if frame is None: return None, "Invalid image", "" processed_frame, text, confidence = process_frame(frame) processed_pil = Image.fromarray(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)) return processed_pil, text, confidence # Gradio Interface interface = gr.Interface( fn=process_input, inputs=gr.File(type="file", label="Upload Image or Video"), outputs=[ gr.Image(type="pil", label="Detected Output"), gr.Textbox(label="Detected Text (Bangla)"), gr.Textbox(label="Confidence (%)") ], title="YOLOv10n License Plate Detector (Bangla)", description="Upload an image or video. Detects plates and extracts Bangla text using OCR (CPU)." ) interface.launch()