import gradio as gr import torch import cv2 import pytesseract import numpy as np from PIL import Image from ultralytics import YOLO # Load model model = YOLO("/home/user/app/best.pt") # Label map label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"} def process_frame(frame): # Resize to YOLO input shape input_img = cv2.resize(frame, (640, 640)) results = model(input_img)[0] detections = results.boxes.data.cpu().numpy() extracted_texts = [] confidences = [] for det in detections: if len(det) < 6: continue x1, y1, x2, y2, conf, cls = det x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) label = label_map.get(int(cls), "Unknown") percent = f"{conf * 100:.2f}%" # Draw box and label on image cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # OCR cropped = frame[y1:y2, x1:x2] # Use original frame for OCR if cropped.size > 0: gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY) text = pytesseract.image_to_string(gray, config="--psm 6 -l ben") extracted_texts.append(text.strip()) confidences.append(percent) # Convert to PIL annotated = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB) pil_img = Image.fromarray(annotated) return pil_img, "\n".join(extracted_texts), ", ".join(confidences) def process_input(input_file): file_path = input_file.name if file_path.endswith(('.mp4', '.avi', '.mov')): cap = cv2.VideoCapture(file_path) ret, frame = cap.read() cap.release() if not ret: return None, "Couldn't read video", "" else: frame = cv2.imread(file_path) if frame is None: return None, "Invalid image", "" return process_frame(frame) interface = gr.Interface( fn=process_input, inputs=gr.File(type="filepath", label="Upload Image or Video"), outputs=[ gr.Image(type="pil", label="Detected Output"), gr.Textbox(label="Detected Text (Bangla)"), gr.Textbox(label="Confidence (%)") ], title="YOLOv10n License Plate Detector (Bangla)", description="Upload an image or video. Detects license plates and extracts Bangla text using OCR." ) interface.launch()