Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,68 +2,85 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import cv2
|
4 |
import pytesseract
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
#
|
7 |
-
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
output_video = "output.mp4"
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# Draw the bounding box on the frame
|
36 |
-
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
label = "Non-License Plate"
|
45 |
-
else:
|
46 |
-
label = "Unknown"
|
47 |
-
|
48 |
-
# Draw label and confidence on frame
|
49 |
-
cv2.putText(frame, f"{label}: {conf:.2f}", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
50 |
|
51 |
-
|
52 |
-
license_plate = frame[y1:y2, x1:x2]
|
53 |
-
# Convert to grayscale for better OCR results
|
54 |
-
gray_license_plate = cv2.cvtColor(license_plate, cv2.COLOR_BGR2GRAY)
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
# Create Gradio Interface
|
68 |
-
interface = gr.Interface(fn=process_video, inputs=gr.inputs.Video(), outputs=gr.outputs.Video())
|
69 |
interface.launch()
|
|
|
2 |
import torch
|
3 |
import cv2
|
4 |
import pytesseract
|
5 |
+
import numpy as np
|
6 |
+
from PIL import Image
|
7 |
+
import sys
|
8 |
+
import os
|
9 |
|
10 |
+
# Add YOLOv10 repo to path
|
11 |
+
sys.path.append('./YOLOv10') # adjust path if needed
|
12 |
+
from models.common import DetectMultiBackend # YOLOv10 model loader
|
13 |
|
14 |
+
# Load model
|
15 |
+
model = DetectMultiBackend('best(3).pt', device='cpu') # your trained YOLOv10n model path
|
16 |
+
model.eval()
|
|
|
17 |
|
18 |
+
# Frame processing function
|
19 |
+
def process_frame(frame):
|
20 |
+
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
21 |
+
img_tensor = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
|
22 |
+
img_tensor = img_tensor.unsqueeze(0)
|
23 |
|
24 |
+
results = model(img_tensor, augment=False)
|
25 |
+
results = model.nms(results)[0]
|
26 |
|
27 |
+
extracted_texts = []
|
28 |
+
confidences = []
|
29 |
+
|
30 |
+
for det in results:
|
31 |
+
x1, y1, x2, y2, conf, cls = det.tolist()
|
32 |
+
if conf > 0.5:
|
33 |
+
x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
|
34 |
+
cls = int(cls)
|
35 |
|
36 |
+
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
|
37 |
+
label = label_map.get(cls, "Unknown")
|
38 |
+
percent = f"{conf * 100:.2f}%"
|
39 |
|
40 |
+
# Draw box & label
|
41 |
+
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
42 |
+
cv2.putText(frame, f"{label}: {percent}", (x1, y1 - 10),
|
43 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
|
|
|
|
44 |
|
45 |
+
# OCR
|
46 |
+
lp_crop = frame[y1:y2, x1:x2]
|
47 |
+
gray = cv2.cvtColor(lp_crop, cv2.COLOR_BGR2GRAY)
|
48 |
+
text = pytesseract.image_to_string(gray, config="--psm 6 -l ben")
|
49 |
+
extracted_texts.append(text.strip())
|
50 |
+
confidences.append(percent)
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
return frame, "\n".join(extracted_texts), ", ".join(confidences)
|
|
|
|
|
|
|
53 |
|
54 |
+
# Input handler
|
55 |
+
def process_input(input_file):
|
56 |
+
file_path = input_file.name
|
57 |
+
|
58 |
+
if file_path.endswith(('.mp4', '.avi', '.mov')):
|
59 |
+
cap = cv2.VideoCapture(file_path)
|
60 |
+
ret, frame = cap.read()
|
61 |
+
cap.release()
|
62 |
+
if not ret:
|
63 |
+
return None, "Couldn't read video", ""
|
64 |
+
else:
|
65 |
+
frame = cv2.imread(file_path)
|
66 |
+
if frame is None:
|
67 |
+
return None, "Invalid image", ""
|
68 |
|
69 |
+
processed_frame, text, confidence = process_frame(frame)
|
70 |
+
processed_pil = Image.fromarray(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB))
|
71 |
+
return processed_pil, text, confidence
|
72 |
|
73 |
+
# Gradio Interface
|
74 |
+
interface = gr.Interface(
|
75 |
+
fn=process_input,
|
76 |
+
inputs=gr.File(type="file", label="Upload Image or Video"),
|
77 |
+
outputs=[
|
78 |
+
gr.Image(type="pil", label="Detected Output"),
|
79 |
+
gr.Textbox(label="Detected Text (Bangla)"),
|
80 |
+
gr.Textbox(label="Confidence (%)")
|
81 |
+
],
|
82 |
+
title="YOLOv10n License Plate Detector (Bangla)",
|
83 |
+
description="Upload an image or video. Detects plates and extracts Bangla text using OCR (CPU)."
|
84 |
+
)
|
85 |
|
|
|
|
|
86 |
interface.launch()
|