import gradio as gr import cv2 import easyocr import numpy as np import os from PIL import Image from ultralytics import YOLO from datetime import datetime # Load YOLO model model = YOLO("/home/user/app/best.pt") # Label map label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"} # EasyOCR Bengali reader = easyocr.Reader(['bn']) def annotate_frame(frame): input_img = cv2.resize(frame, (640, 640)) results = model(input_img)[0] detections = results.boxes.data.cpu().numpy() for det in detections: if len(det) < 6: continue x1, y1, x2, y2, conf, cls = det x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) label = label_map.get(int(cls), "Unknown") percent = f"{conf * 100:.2f}%" # Draw box and label cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # OCR cropped = frame[y1:y2, x1:x2] if cropped.size > 0: ocr_result = reader.readtext(cropped) for i, item in enumerate(ocr_result): text = item[1].strip() conf = item[2] cv2.putText(input_img, text, (x1, y2 + 20 + i*25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2) return cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB) def process_input(input_file): file_path = input_file.name ext = os.path.splitext(file_path)[-1].lower() if ext in ['.mp4', '.avi', '.mov']: cap = cv2.VideoCapture(file_path) if not cap.isOpened(): return None, None, "Could not open video file", "" fps = cap.get(cv2.CAP_PROP_FPS) frame_skip = 5 frame_id = 0 output_path = "annotated_output.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (640, 640)) while True: ret, frame = cap.read() if not ret: break if frame_id % frame_skip != 0: frame_id += 1 continue frame_id += 1 frame = cv2.resize(frame, (640, 640)) annotated = annotate_frame(frame) out.write(cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR)) cap.release() out.release() return output_path, None, "Bangla text in video (see frames)", "OCR confidence displayed" else: frame = cv2.imread(file_path) if frame is None: return None, None, "Invalid image", "" frame = cv2.resize(frame, (640, 640)) annotated = annotate_frame(frame) pil_img = Image.fromarray(annotated) return None, pil_img, "Bangla text in image", "OCR confidence in image" interface = gr.Interface( fn=process_input, inputs=gr.File(type="filepath", label="Upload Image or Video"), outputs=[ gr.Video(label="Output Video"), gr.Image(type="pil", label="Output Image"), gr.Textbox(label="Detected Text (Bangla)"), gr.Textbox(label="Confidence (%)") ], title="YOLOv5 License Plate Detector (Bangla OCR)", description="Upload an image or video. Detects license plates and extracts Bangla text using EasyOCR." ) interface.launch()