Ruslan Magana Vsevolodovna commited on
Commit
e4b9e92
·
1 Parent(s): 82fe417

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -25
app.py CHANGED
@@ -77,16 +77,104 @@ def getSize(filename):
77
  return st.st_size
78
 
79
 
80
- def generate_transcript(url,lang_api):
81
- id = url[url.index("=")+1:]
82
- transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api])
83
  script = ""
84
- for text in transcript:
85
  t = text["text"]
86
- if t != '[Music]':
87
- script += t + " "
 
 
 
 
 
 
 
 
 
 
 
 
88
  return script
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # Set environment variables
92
  home_dir = os.getcwd()
@@ -141,11 +229,11 @@ def video_to_translate(url,initial_language,final_language):
141
  print(file_obj)
142
  # Insert Local Video File Path
143
  videoclip = VideoFileClip(file_obj)
144
- try:
145
- # Trying to get transcripts
146
- text = generate_transcript(url,lang_api)
147
- print("Transcript Found")
148
- except Exception:
149
  print("No Transcript Found")
150
  # Trying to recognize audio
151
  # Insert Local Audio File Path
@@ -185,21 +273,21 @@ def video_to_translate(url,initial_language,final_language):
185
  return "./demo/tryagain.mp4"
186
 
187
  #print(text)
188
- print("Destination language ",lang)
189
 
190
- # init the Google API translator
191
- translator = Translator()
192
 
193
 
194
- try:
195
- translation = translator.translate(text, dest=lang)
196
- except Exception:
197
- print("This text cannot be translated")
198
- cleanup()
199
- return "./demo/tryagain.mp4"
200
-
201
- #translation.text
202
- trans=translation.text
203
 
204
  myobj = gTTS(text=trans, lang=lang, slow=False)
205
  myobj.save("audio.wav")
@@ -242,10 +330,10 @@ gr.Interface(fn = video_to_translate,
242
 
243
  examples = [
244
  ["https://www.youtube.com/watch?v=uLVRZE8OAI4", "English","Spanish"],
245
- ["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"],
246
  ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
247
  ["https://www.youtube.com/watch?v=aDGY4ezFR_0", "Italian","English"],
248
- ["https://www.youtube.com/watch?v=QbkhvLrlex4", "Russian","English"],
249
  ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
250
  ["https://www.youtube.com/watch?v=nOGZvu6tJFE", "German","Spanish"]
251
 
 
77
  return st.st_size
78
 
79
 
80
+ def clean_transcript(transcript_list):
 
 
81
  script = ""
82
+ for text in transcript_list:
83
  t = text["text"]
84
+ if( (t != '[music]') and \
85
+ (t != '[Music]') and \
86
+ (t != '[музыка]') and \
87
+ (t != '[Музыка]') and \
88
+ (t != '[musik]') and \
89
+ (t != '[Musik]') and \
90
+ (t != '[musica]') and \
91
+ (t != '[Musica]') and \
92
+ (t != '[música]') and \
93
+ (t != '[Música]') and \
94
+ (t != '[音楽]') and \
95
+ (t != '[音乐]')
96
+ ) :
97
+ script += t + " "
98
  return script
99
+
100
+
101
+ def get_transcript(url,desired_language):
102
+ id_you= url[url.index("=")+1:]
103
+ try:
104
+ # retrieve the available transcripts
105
+ transcript_list = YouTubeTranscriptApi.list_transcripts(id_you)
106
+
107
+ except Exception:
108
+ print('TranscriptsDisabled:')
109
+ is_translated = False
110
+ return " ", is_translated
111
+
112
+ lista=[]
113
+ transcript_translation_languages=[]
114
+ # iterate over all available transcripts
115
+ for transcript in transcript_list:
116
+ lista.extend([
117
+ transcript.language_code,
118
+ transcript.is_generated,
119
+ transcript.is_translatable,
120
+ transcript_translation_languages.append(transcript.translation_languages),
121
+ ])
122
+ print(lista)
123
+ n_size=int(len(lista)/4)
124
+ print("There are {} avialable scripts".format(n_size))
125
+ import numpy as np
126
+ matrix = np.array(lista)
127
+ shape = (n_size,4)
128
+ matrix=matrix.reshape(shape)
129
+ matrix=matrix.tolist()
130
+ is_manually=False
131
+ is_automatic=False
132
+ for lista in matrix:
133
+ #print(lista)
134
+ language_code=lista[0]
135
+ is_generated=lista[1]
136
+ is_translatable=lista[2]
137
+ if not is_generated and is_translatable :
138
+ print("Script found manually generated")
139
+ is_manually=True
140
+ language_code_man=language_code
141
+ if is_generated and is_translatable :
142
+ print("Script found automatic generated")
143
+ is_automatic=True
144
+ language_code_au=language_code
145
+
146
+ if is_manually:
147
+ # we try filter for manually created transcripts
148
+ print('We extract manually created transcripts')
149
+ transcript = transcript_list.find_manually_created_transcript([language_code])
150
+
151
+ elif is_automatic:
152
+ print('We extract generated transcript')
153
+ # or automatically generated ones, but not translated
154
+ transcript = transcript_list.find_generated_transcript([language_code])
155
+ else:
156
+ print('We try find the transcript')
157
+ # we directly filter for the language you are looking for, using the transcript list
158
+ transcript = transcript_list.find_transcript([language_code])
159
 
160
+ is_translated = False
161
+ if is_translatable :
162
+ for available_trad in transcript_translation_languages[0]:
163
+ if available_trad['language_code']==desired_language:
164
+ print("It was found the translation for lang:",desired_language)
165
+ print('We translate directly the transcript')
166
+ transcript_translated = transcript.translate(desired_language)
167
+ transcript_translated=transcript_translated.fetch()
168
+ translated=clean_transcript(transcript_translated)
169
+ is_translated = True
170
+ script_translated = ""
171
+ if is_translated :
172
+ script_translated = translated
173
+
174
+ transcript=transcript.fetch()
175
+ script = clean_transcript(transcript)
176
+
177
+ return script, script_translated, is_translated
178
 
179
  # Set environment variables
180
  home_dir = os.getcwd()
 
229
  print(file_obj)
230
  # Insert Local Video File Path
231
  videoclip = VideoFileClip(file_obj)
232
+ is_traduc=False
233
+ # Trying to get transcripts
234
+ text, trans, is_traduc = get_transcript(url,desired_language=lang)
235
+ print("Transcript Found")
236
+ if not is_traduc:
237
  print("No Transcript Found")
238
  # Trying to recognize audio
239
  # Insert Local Audio File Path
 
273
  return "./demo/tryagain.mp4"
274
 
275
  #print(text)
276
+ print("Destination language ",lang)
277
 
278
+ # init the Google API translator
279
+ translator = Translator()
280
 
281
 
282
+ try:
283
+ translation = translator.translate(text, dest=lang)
284
+ except Exception:
285
+ print("This text cannot be translated")
286
+ cleanup()
287
+ return "./demo/tryagain.mp4"
288
+
289
+ #translation.text
290
+ trans=translation.text
291
 
292
  myobj = gTTS(text=trans, lang=lang, slow=False)
293
  myobj.save("audio.wav")
 
330
 
331
  examples = [
332
  ["https://www.youtube.com/watch?v=uLVRZE8OAI4", "English","Spanish"],
333
+ ["https://www.youtube.com/watch?v=s5XvjAC7ai8", "Russian","Italian"],
334
  ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
335
  ["https://www.youtube.com/watch?v=aDGY4ezFR_0", "Italian","English"],
336
+ ["https://www.youtube.com/watch?v=s5XvjAC7ai8", "Russian","English"],
337
  ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
338
  ["https://www.youtube.com/watch?v=nOGZvu6tJFE", "German","Spanish"]
339