import gradio as gr import torch import cv2 import pytesseract import numpy as np from PIL import Image import sys import os import sys from pathlib import Path import torch import dill._dill from torch.serialization import add_safe_globals, safe_globals # Add yolov10 directory to Python path sys.path.append(str(Path(__file__).resolve().parent / "yolov10")) # Import your custom YOLOv10 model from ultralytics.nn.tasks import YOLOv10DetectionModel from ultralytics import YOLO # Register the required classes/functions add_safe_globals([YOLOv10DetectionModel, dill._dill._load_type]) # Load your trained YOLOv10 model with safe_globals([YOLOv10DetectionModel, dill._dill._load_type]): model = YOLO('/home/user/app/best.pt') # Frame processing function def process_frame(frame): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img_tensor = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0 img_tensor = img_tensor.unsqueeze(0) results = model(img_tensor, augment=False) results = model.nms(results)[0] extracted_texts = [] confidences = [] for det in results: x1, y1, x2, y2, conf, cls = det.tolist() if conf > 0.5: x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) cls = int(cls) label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"} label = label_map.get(cls, "Unknown") percent = f"{conf * 100:.2f}%" # Draw box & label cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(frame, f"{label}: {percent}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # OCR lp_crop = frame[y1:y2, x1:x2] gray = cv2.cvtColor(lp_crop, cv2.COLOR_BGR2GRAY) text = pytesseract.image_to_string(gray, config="--psm 6 -l ben") extracted_texts.append(text.strip()) confidences.append(percent) return frame, "\n".join(extracted_texts), ", ".join(confidences) # Input handler def process_input(input_file): file_path = input_file.name if file_path.endswith(('.mp4', '.avi', '.mov')): cap = cv2.VideoCapture(file_path) ret, frame = cap.read() cap.release() if not ret: return None, "Couldn't read video", "" else: frame = cv2.imread(file_path) if frame is None: return None, "Invalid image", "" processed_frame, text, confidence = process_frame(frame) processed_pil = Image.fromarray(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)) return processed_pil, text, confidence # Gradio Interface interface = gr.Interface( fn=process_input, inputs=gr.File(type="file", label="Upload Image or Video"), outputs=[ gr.Image(type="pil", label="Detected Output"), gr.Textbox(label="Detected Text (Bangla)"), gr.Textbox(label="Confidence (%)") ], title="YOLOv10n License Plate Detector (Bangla)", description="Upload an image or video. Detects plates and extracts Bangla text using OCR (CPU)." ) interface.launch()