Spaces:
Runtime error
Runtime error
File size: 5,484 Bytes
2ca28b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import time
import streamlit as st
from gradio_client import Client
import yt_dlp
import pytube
from pytube.exceptions import VideoUnavailable
client = Client("https://sanchit-gandhi-whisper-jax.hf.space/")
def get_whisper_res_if_the_video_is_youtube_video(video_url, max_recursion_time=20):
start_time = time.time() # Get the start time
while True:
try:
result = client.predict(
video_url, # str in 'YouTube URL' Textbox component
"translate", # str in 'Task' Radio component
True, # bool in 'Return timestamps' Checkbox component
fn_index=6)
# print('return from fn_index=6')
return result
except:
try:
result = client.predict(
video_url, # str in 'YouTube URL' Textbox component
"translate", # str in 'Task' Radio component
True, # bool in 'Return timestamps' Checkbox component
fn_index=7)
# print('return from fn_index=7')
return result
except:
try:
result = client.predict(
video_url, # str in 'YouTube URL' Textbox component
"translate", # str in 'Task' Radio component
True, # bool in 'Return timestamps' Checkbox component
api_name="/predict_2")
# print('return from /predict_2')
return result
except:
if time.time() - start_time > max_recursion_time:
# result = get_whisper_res_if_the_video_is_not_youtube_video(video_url)
# return result
return
time.sleep(2)
continue
def get_whisper_res_if_the_video_is_not_youtube_video(video_url, max_recursion_time = 20):
start_time = time.time() # Get the start time
while True:
try:
result = client.predict(
video_url, # str (filepath or URL to file) in 'inputs' Audio component
"translate", # str in 'Task' Radio component
True, # bool in 'Return timestamps' Checkbox component
api_name="/predict")
return result
except:
try:
result = client.predict(
video_url, # str in 'YouTube URL' Textbox component
"translate", # str in 'Task' Radio component
True, # bool in 'Return timestamps' Checkbox component
api_name="/predict_1")
return result
except:
if time.time() - start_time > max_recursion_time:
result = get_whisper_res_if_the_video_is_youtube_video(video_url)
return result
time.sleep(2)
continue
def postprocess_timestamps(result, index):
output_list = []
for text in result[index].split('\n'):
start = text.split(' -> ')[0][1:]
if len(start.split(':')) == 2: #there are only minutes and seconds
min = int(start.split(':')[0])
sec = int(float(start.split(':')[1]))
index_of_space = text[24:].find(' ')
# text = text[24+index_of_space:]
text = ':'.join([str(min), str(sec)]) + text[24+index_of_space:]
output_list.append(text)
else: #there are hours also
start = text.split(' -> ')[0][1:]
hour = int(start.split(':')[0])
min = int(start.split(':')[1])
sec = int(float(start.split(':')[2]))
# text = text[30:]
text = ':'.join([str(hour),str(min),str(sec)]) + text[30:]
output_list.append(text)
return output_list
def postprocess_whisper_jax_output(result):
# index = 1 if youtube video, else if non-youtube video
try:
output_list = postprocess_timestamps(result, index=1)
except:
output_list = postprocess_timestamps(result, index=0)
return ', '.join(output_list)
def get_audio_info(url):
with yt_dlp.YoutubeDL() as ydl:
info = ydl.extract_info(url, download = False)
return info
def get_whisper_result(video_url):
# if the link is youtube video
if 'youtube.com/watch?v=' in video_url:
try:
video = pytube.YouTube(video_url)
video.check_availability()
result = get_whisper_res_if_the_video_is_youtube_video(video_url)
transcript = postprocess_whisper_jax_output(result)
return transcript
except VideoUnavailable:
return False
# if the link is not a youtube video
else:
result = get_whisper_res_if_the_video_is_not_youtube_video(video_url)
if result:
transcript = postprocess_whisper_jax_output(result)
return transcript
return False
|