Spaces:
Runtime error
Runtime error
File size: 4,535 Bytes
5017f0e da2ea29 5017f0e da2ea29 5017f0e da2ea29 5017f0e da2ea29 5017f0e da2ea29 5017f0e da2ea29 5017f0e da2ea29 5017f0e da2ea29 5017f0e da2ea29 5017f0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import numpy as np
import PIL
from PIL import Image, ImageDraw
import gradio as gr
import torch
import easyocr
import os
from pathlib import Path
import cv2
#torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/BeautyIsTruthTruthisBeauty.JPG', 'BeautyIsTruthTruthisBeauty.JPG')
#torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/PleaseRepeatLouder.jpg', 'PleaseRepeatLouder.jpg')
#torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/ProhibitedInWhiteHouse.JPG', 'ProhibitedInWhiteHouse.JPG')
torch.hub.download_url_to_file('https://raw.githubusercontent.com/AaronCWacker/Yggdrasil/master/images/20-Books.jpg','20-Books.jpg')
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/english.png', 'COVID.png')
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/chinese.jpg', 'chinese.jpg')
torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/japanese.jpg', 'japanese.jpg')
torch.hub.download_url_to_file('https://i.imgur.com/mwQFd7G.jpeg', 'Hindi.jpeg')
def draw_boxes(image, bounds, color='yellow', width=2):
draw = ImageDraw.Draw(image)
for bound in bounds:
p0, p1, p2, p3 = bound[0]
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
return image
def inference(video, lang, time_step):
# output = f"{Path(video).stem}_detected{Path(src).suffix}"
output = 'results.mp4'
reader = easyocr.Reader(lang)
bounds = []
vidcap = cv2.VideoCapture(video)
success, frame = vidcap.read()
count = 0
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
output_frames = []
while success:
if count % (int(frame_rate * time_step)) == 0:
bounds = reader.readtext(frame)
im = PIL.Image.fromarray(frame)
draw_boxes(im, bounds)
output_frames.append(np.array(im))
success, frame = vidcap.read()
count += 1
# Default resolutions of the frame are obtained. The default resolutions are system dependent.
# We convert the resolutions from float to integer.
width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vidcap.get(cv2.CAP_PROP_FPS)
frames_total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
# Define the codec and create VideoWriter object.
temp = f"{Path(output).stem}_temp{Path(output).suffix}"
output_video = cv2.VideoWriter(
temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
)
# output_video = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
for frame in output_frames:
output_video.write(frame)
output_video.release()
vidcap.release()
# Compressing the video for smaller size and web compatibility.
os.system(
f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
)
os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
return output
title = '🖼️Video to Multilingual OCR👁️Gradio'
description = 'Multilingual OCR which works conveniently on all devices in multiple languages.'
article = "<p style='text-align: center'></p>"
examples = [
#['PleaseRepeatLouder.jpg',['ja']],['ProhibitedInWhiteHouse.JPG',['en']],['BeautyIsTruthTruthisBeauty.JPG',['en']],
['20-Books.jpg',['en']],['COVID.png',['en']],['chinese.jpg',['ch_sim', 'en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]
]
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
choices = [
"ch_sim",
"ch_tra",
"de",
"en",
"es",
"ja",
"hi",
"ru"
]
gr.Interface(
inference,
[
# gr.inputs.Image(type='file', label='Input Image'),
gr.inputs.Video(label='Input Video'),
gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='Language'),
gr.inputs.Number(label='Time Step (in seconds)', default=1.0)
],
[
gr.outputs.Video(label='Output Video'),
# gr.outputs.Dataframe(headers=['Text', 'Confidence'])
],
title=title,
description=description,
article=article,
# examples=examples,
css=css,
enable_queue=True
).launch(debug=True) |