import gradio as gr import torch import cv2 import pytesseract import numpy as np from PIL import Image import sys import os import sys from pathlib import Path import torch from torch.serialization import add_safe_globals, safe_globals # Add yolov10 directory to Python path sys.path.append(str(Path(__file__).resolve().parent / "yolov10")) # Now import from yolov10 from ultralytics import YOLO from ultralytics.nn.tasks import YOLOv10DetectionModel # Register the actual YOLOv10DetectionModel class for safe loading add_safe_globals([YOLOv10DetectionModel]) # Load the model safely using context manager with safe_globals([YOLOv10DetectionModel]): model = YOLO('/home/user/app/best.pt') # Make sure this file exists # Frame processing function def process_frame(frame): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img_tensor = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0 img_tensor = img_tensor.unsqueeze(0) results = model(img_tensor, augment=False) results = model.nms(results)[0] extracted_texts = [] confidences = [] for det in results: x1, y1, x2, y2, conf, cls = det.tolist() if conf > 0.5: x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) cls = int(cls) label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"} label = label_map.get(cls, "Unknown") percent = f"{conf * 100:.2f}%" # Draw box & label cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(frame, f"{label}: {percent}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # OCR lp_crop = frame[y1:y2, x1:x2] gray = cv2.cvtColor(lp_crop, cv2.COLOR_BGR2GRAY) text = pytesseract.image_to_string(gray, config="--psm 6 -l ben") extracted_texts.append(text.strip()) confidences.append(percent) return frame, "\n".join(extracted_texts), ", ".join(confidences) # Input handler def process_input(input_file): file_path = input_file.name if file_path.endswith(('.mp4', '.avi', '.mov')): cap = cv2.VideoCapture(file_path) ret, frame = cap.read() cap.release() if not ret: return None, "Couldn't read video", "" else: frame = cv2.imread(file_path) if frame is None: return None, "Invalid image", "" processed_frame, text, confidence = process_frame(frame) processed_pil = Image.fromarray(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)) return processed_pil, text, confidence # Gradio Interface interface = gr.Interface( fn=process_input, inputs=gr.File(type="file", label="Upload Image or Video"), outputs=[ gr.Image(type="pil", label="Detected Output"), gr.Textbox(label="Detected Text (Bangla)"), gr.Textbox(label="Confidence (%)") ], title="YOLOv10n License Plate Detector (Bangla)", description="Upload an image or video. Detects plates and extracts Bangla text using OCR (CPU)." ) interface.launch()