File size: 3,332 Bytes
dd2ba72 2405743 1f353b4 2405743 1f353b4 4735088 2405743 9efaee0 2405743 307c8f3 1dd705c 307c8f3 d85faf4 2405743 581d1f5 2405743 307c8f3 dd2ba72 e51c033 307c8f3 2405743 307c8f3 2405743 307c8f3 2405743 dd2ba72 2405743 e51c033 1f353b4 2405743 1f353b4 2405743 1f353b4 58a562d 07eed04 58a562d 2405743 07eed04 2405743 58a562d 2405743 07eed04 2405743 1f353b4 2405743 07eed04 2405743 1f353b4 07eed04 dd2ba72 07eed04 2405743 07eed04 307c8f3 dd2ba72 1f353b4 28eb4e5 1f353b4 f84d408 1f353b4 2405743 1f353b4 dd2ba72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import cv2
import easyocr
import numpy as np
import os
from PIL import Image
from ultralytics import YOLO
from datetime import datetime
# Load YOLO model
model = YOLO("/home/user/app/best.pt")
# Label map
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
# EasyOCR Bengali
reader = easyocr.Reader(['bn'])
def annotate_frame(frame):
input_img = cv2.resize(frame, (640, 640))
results = model(input_img)[0]
detections = results.boxes.data.cpu().numpy()
for det in detections:
if len(det) < 6:
continue
x1, y1, x2, y2, conf, cls = det
x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
label = label_map.get(int(cls), "Unknown")
percent = f"{conf * 100:.2f}%"
# Draw box and label
cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# OCR
cropped = frame[y1:y2, x1:x2]
if cropped.size > 0:
ocr_result = reader.readtext(cropped)
for i, item in enumerate(ocr_result):
text = item[1].strip()
conf = item[2]
cv2.putText(input_img, text, (x1, y2 + 20 + i*25),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
return cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
def process_input(input_file):
file_path = input_file.name
ext = os.path.splitext(file_path)[-1].lower()
if ext in ['.mp4', '.avi', '.mov']:
cap = cv2.VideoCapture(file_path)
if not cap.isOpened():
return None, None, "Could not open video file", ""
fps = cap.get(cv2.CAP_PROP_FPS)
frame_skip = 5
frame_id = 0
output_path = "annotated_output.mp4"
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (640, 640))
while True:
ret, frame = cap.read()
if not ret:
break
if frame_id % frame_skip != 0:
frame_id += 1
continue
frame_id += 1
frame = cv2.resize(frame, (640, 640))
annotated = annotate_frame(frame)
out.write(cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR))
cap.release()
out.release()
return output_path, None, "Bangla text in video (see frames)", "OCR confidence displayed"
else:
frame = cv2.imread(file_path)
if frame is None:
return None, None, "Invalid image", ""
frame = cv2.resize(frame, (640, 640))
annotated = annotate_frame(frame)
pil_img = Image.fromarray(annotated)
return None, pil_img, "Bangla text in image", "OCR confidence in image"
interface = gr.Interface(
fn=process_input,
inputs=gr.File(type="filepath", label="Upload Image or Video"),
outputs=[
gr.Video(label="Output Video"),
gr.Image(type="pil", label="Output Image"),
gr.Textbox(label="Detected Text (Bangla)"),
gr.Textbox(label="Confidence (%)")
],
title="YOLOv5 License Plate Detector (Bangla OCR)",
description="Upload an image or video. Detects license plates and extracts Bangla text using EasyOCR."
)
interface.launch()
|