Ruslan Magana Vsevolodovna
		
	commited on
		
		
					Commit 
							
							·
						
						a216bdd
	
1
								Parent(s):
							
							3162e54
								
Add application file
Browse files- README.md +2 -1
 - app.py +229 -0
 - demo/tryagain.mp4 +0 -0
 - requirements.txt +8 -0
 - utils.py +37 -0
 
    	
        README.md
    CHANGED
    
    | 
         @@ -1,8 +1,9 @@ 
     | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
             
            title: Youtube Video Translator
         
     | 
| 3 | 
         
            -
            emoji:  
     | 
| 4 | 
         
             
            colorFrom: yellow
         
     | 
| 5 | 
         
             
            colorTo: purple
         
     | 
| 
         | 
|
| 6 | 
         
             
            sdk: gradio
         
     | 
| 7 | 
         
             
            sdk_version: 3.2
         
     | 
| 8 | 
         
             
            app_file: app.py
         
     | 
| 
         | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
             
            title: Youtube Video Translator
         
     | 
| 3 | 
         
            +
            emoji: 🐨
         
     | 
| 4 | 
         
             
            colorFrom: yellow
         
     | 
| 5 | 
         
             
            colorTo: purple
         
     | 
| 6 | 
         
            +
            python_version: 3.8.9
         
     | 
| 7 | 
         
             
            sdk: gradio
         
     | 
| 8 | 
         
             
            sdk_version: 3.2
         
     | 
| 9 | 
         
             
            app_file: app.py
         
     | 
    	
        app.py
    ADDED
    
    | 
         @@ -0,0 +1,229 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # coding=utf8
         
     | 
| 2 | 
         
            +
            # Youtube Video Translator
         
     | 
| 3 | 
         
            +
            # Developed by Ruslan Magana Vsevolodovna
         
     | 
| 4 | 
         
            +
            # https://ruslanmv.com/
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # importing all necessary libraries
         
     | 
| 7 | 
         
            +
            import pathlib
         
     | 
| 8 | 
         
            +
            import sys, os
         
     | 
| 9 | 
         
            +
            from gtts import gTTS
         
     | 
| 10 | 
         
            +
            import gradio as gr
         
     | 
| 11 | 
         
            +
            import os
         
     | 
| 12 | 
         
            +
            import speech_recognition as sr
         
     | 
| 13 | 
         
            +
            from googletrans import Translator, constants
         
     | 
| 14 | 
         
            +
            from pprint import pprint
         
     | 
| 15 | 
         
            +
            from moviepy.editor import *
         
     | 
| 16 | 
         
            +
            from pytube import YouTube
         
     | 
| 17 | 
         
            +
            from youtube_transcript_api import YouTubeTranscriptApi
         
     | 
| 18 | 
         
            +
            from utils import *
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            def download_video(url):
         
     | 
| 21 | 
         
            +
                print("Downloading...")
         
     | 
| 22 | 
         
            +
                local_file = (
         
     | 
| 23 | 
         
            +
                    YouTube(url)
         
     | 
| 24 | 
         
            +
                    .streams.filter(progressive=True, file_extension="mp4")
         
     | 
| 25 | 
         
            +
                    .first()
         
     | 
| 26 | 
         
            +
                    .download()
         
     | 
| 27 | 
         
            +
                )
         
     | 
| 28 | 
         
            +
                print("Downloaded")
         
     | 
| 29 | 
         
            +
                return local_file
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            def validate_url(url):
         
     | 
| 32 | 
         
            +
                import validators
         
     | 
| 33 | 
         
            +
                if not validators.url(url):
         
     | 
| 34 | 
         
            +
                    print("Hi there URL seems invalid ")
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            def cleanup():
         
     | 
| 38 | 
         
            +
                import pathlib
         
     | 
| 39 | 
         
            +
                import glob
         
     | 
| 40 | 
         
            +
                types = ('*.mp4', '*.wav') # the tuple of file types
         
     | 
| 41 | 
         
            +
                #Finding mp4 and wave files
         
     | 
| 42 | 
         
            +
                junks = []
         
     | 
| 43 | 
         
            +
                for files in types:
         
     | 
| 44 | 
         
            +
                    junks.extend(glob.glob(files))
         
     | 
| 45 | 
         
            +
                try:    
         
     | 
| 46 | 
         
            +
                    # Deleting those files
         
     | 
| 47 | 
         
            +
                    for junk in junks:
         
     | 
| 48 | 
         
            +
                        print("Deleting",junk)
         
     | 
| 49 | 
         
            +
                        # Setting the path for the file to delete
         
     | 
| 50 | 
         
            +
                        file = pathlib.Path(junk)
         
     | 
| 51 | 
         
            +
                        # Calling the unlink method on the path
         
     | 
| 52 | 
         
            +
                        file.unlink()               
         
     | 
| 53 | 
         
            +
                except Exception:
         
     | 
| 54 | 
         
            +
                    print("I cannot delete the file because it is being used by another process")         
         
     | 
| 55 | 
         
            +
             
     | 
| 56 | 
         
            +
            def getSize(filename):
         
     | 
| 57 | 
         
            +
                st = os.stat(filename)
         
     | 
| 58 | 
         
            +
                return st.st_size
         
     | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
            +
             
     | 
| 61 | 
         
            +
            def generate_transcript(url,lang_api):
         
     | 
| 62 | 
         
            +
                id = url[url.index("=")+1:]        
         
     | 
| 63 | 
         
            +
                transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api])
         
     | 
| 64 | 
         
            +
                script = ""
         
     | 
| 65 | 
         
            +
                for text in transcript:
         
     | 
| 66 | 
         
            +
                    t = text["text"]
         
     | 
| 67 | 
         
            +
                    if t != '[Music]':
         
     | 
| 68 | 
         
            +
                        script += t + " "		
         
     | 
| 69 | 
         
            +
                return script
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
             
     | 
| 72 | 
         
            +
            def video_to_translate(url,initial_language,final_language):
         
     | 
| 73 | 
         
            +
             
     | 
| 74 | 
         
            +
                #Internal definitions
         
     | 
| 75 | 
         
            +
                if initial_language == "English":
         
     | 
| 76 | 
         
            +
                    lang_in='en-US'
         
     | 
| 77 | 
         
            +
                    lang_api='en'
         
     | 
| 78 | 
         
            +
                elif initial_language == "Italian":
         
     | 
| 79 | 
         
            +
                    lang_in='it-IT'
         
     | 
| 80 | 
         
            +
                    lang_api='it'
         
     | 
| 81 | 
         
            +
                elif initial_language == "Spanish":
         
     | 
| 82 | 
         
            +
                    lang_in='es-MX'
         
     | 
| 83 | 
         
            +
                    lang_api='es'
         
     | 
| 84 | 
         
            +
                elif initial_language == "Russian":
         
     | 
| 85 | 
         
            +
                    lang_in='ru-RU'
         
     | 
| 86 | 
         
            +
                    lang_api='rus'
         
     | 
| 87 | 
         
            +
                elif initial_language == "German":
         
     | 
| 88 | 
         
            +
                    lang_in='de-DE'
         
     | 
| 89 | 
         
            +
                    lang_api='de'
         
     | 
| 90 | 
         
            +
                elif initial_language == "Japanese":
         
     | 
| 91 | 
         
            +
                    lang_in='ja-JP'
         
     | 
| 92 | 
         
            +
                    lang_api='ja'
         
     | 
| 93 | 
         
            +
                if final_language == "English":
         
     | 
| 94 | 
         
            +
                    lang='en'
         
     | 
| 95 | 
         
            +
                elif final_language == "Italian":
         
     | 
| 96 | 
         
            +
                    lang='it'
         
     | 
| 97 | 
         
            +
                elif final_language == "Spanish":
         
     | 
| 98 | 
         
            +
                    lang='es'
         
     | 
| 99 | 
         
            +
                elif final_language == "Russian":
         
     | 
| 100 | 
         
            +
                    lang='ru'
         
     | 
| 101 | 
         
            +
                elif final_language == "German":
         
     | 
| 102 | 
         
            +
                    lang='de'
         
     | 
| 103 | 
         
            +
                elif final_language == "Japanese":
         
     | 
| 104 | 
         
            +
                    lang='ja'        
         
     | 
| 105 | 
         
            +
             
     | 
| 106 | 
         
            +
                # Initial directory
         
     | 
| 107 | 
         
            +
                home_dir = os.getcwd()
         
     | 
| 108 | 
         
            +
                print('Initial directory:',home_dir)
         
     | 
| 109 | 
         
            +
                cleanup()
         
     | 
| 110 | 
         
            +
                # Temporal directory
         
     | 
| 111 | 
         
            +
                temp_dir=os.path.join(home_dir, "temp")
         
     | 
| 112 | 
         
            +
                print('Temporal directory:',temp_dir)
         
     | 
| 113 | 
         
            +
                #Create temp directory
         
     | 
| 114 | 
         
            +
                pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
         
     | 
| 115 | 
         
            +
                # Go to temp directory
         
     | 
| 116 | 
         
            +
                os.chdir(temp_dir)
         
     | 
| 117 | 
         
            +
                print('Changing temporal directory',os.getcwd())
         
     | 
| 118 | 
         
            +
                # Cleaning previous files
         
     | 
| 119 | 
         
            +
                cleanup()
         
     | 
| 120 | 
         
            +
                file_obj=download_video(url)
         
     | 
| 121 | 
         
            +
                print(file_obj)
         
     | 
| 122 | 
         
            +
            # Insert Local Video File Path
         
     | 
| 123 | 
         
            +
                videoclip = VideoFileClip(file_obj)
         
     | 
| 124 | 
         
            +
                try:
         
     | 
| 125 | 
         
            +
                    # Trying to get transcripts
         
     | 
| 126 | 
         
            +
                    text = generate_transcript(url,lang_api)
         
     | 
| 127 | 
         
            +
                    print("Transcript Found")
         
     | 
| 128 | 
         
            +
                except Exception:
         
     | 
| 129 | 
         
            +
                    print("No Transcript Found")
         
     | 
| 130 | 
         
            +
                    # Trying to recognize audio
         
     | 
| 131 | 
         
            +
                    # Insert Local Audio File Path
         
     | 
| 132 | 
         
            +
                    videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
         
     | 
| 133 | 
         
            +
                # initialize the recognizer
         
     | 
| 134 | 
         
            +
                    r = sr.Recognizer()
         
     | 
| 135 | 
         
            +
                    # open the file
         
     | 
| 136 | 
         
            +
                    with sr.AudioFile("audio.wav") as source:
         
     | 
| 137 | 
         
            +
                        # listen for the data (load audio to memory)
         
     | 
| 138 | 
         
            +
                        audio_data = r.record(source)
         
     | 
| 139 | 
         
            +
                        # recognize (convert from speech to text)
         
     | 
| 140 | 
         
            +
                        print("Recognize from ",lang_in)
         
     | 
| 141 | 
         
            +
                        #There is a limit of 10 MB on all single requests sent to the API using local file
         
     | 
| 142 | 
         
            +
                        size_wav=getSize("audio.wav")
         
     | 
| 143 | 
         
            +
                        if  size_wav > 50000000:
         
     | 
| 144 | 
         
            +
                            print("The wav is too large")
         
     | 
| 145 | 
         
            +
                            audio_chunks=split_audio_wav("audio.wav")
         
     | 
| 146 | 
         
            +
                            text=""
         
     | 
| 147 | 
         
            +
                            for chunk in audio_chunks:
         
     | 
| 148 | 
         
            +
                                print("Converting audio to text",chunk)
         
     | 
| 149 | 
         
            +
                                try:
         
     | 
| 150 | 
         
            +
                                    text_chunk= r.recognize_google(audio_data, language = lang_in)
         
     | 
| 151 | 
         
            +
                                except Exception:
         
     | 
| 152 | 
         
            +
                                    print("This video cannot be recognized")
         
     | 
| 153 | 
         
            +
                                    cleanup()
         
     | 
| 154 | 
         
            +
                                    # Return back to main directory
         
     | 
| 155 | 
         
            +
                                    os.chdir(home_dir)
         
     | 
| 156 | 
         
            +
                                    return "./demo/tryagain.mp4"
         
     | 
| 157 | 
         
            +
                                text=text+text_chunk+" "
         
     | 
| 158 | 
         
            +
                            text=str(text)
         
     | 
| 159 | 
         
            +
                            print(type(text))
         
     | 
| 160 | 
         
            +
                            
         
     | 
| 161 | 
         
            +
                        else:
         
     | 
| 162 | 
         
            +
                            text = r.recognize_google(audio_data, language = lang_in)
         
     | 
| 163 | 
         
            +
                    #print(text)
         
     | 
| 164 | 
         
            +
                print("Destination language ",lang)
         
     | 
| 165 | 
         
            +
             
     | 
| 166 | 
         
            +
                # init the Google API translator
         
     | 
| 167 | 
         
            +
                translator = Translator()
         
     | 
| 168 | 
         
            +
             
     | 
| 169 | 
         
            +
             
     | 
| 170 | 
         
            +
                try:
         
     | 
| 171 | 
         
            +
                    translation = translator.translate(text, dest=lang)
         
     | 
| 172 | 
         
            +
                except Exception:
         
     | 
| 173 | 
         
            +
                    print("This text cannot be translated")
         
     | 
| 174 | 
         
            +
                    cleanup()
         
     | 
| 175 | 
         
            +
                    # Return back to main directory
         
     | 
| 176 | 
         
            +
                    os.chdir(home_dir)
         
     | 
| 177 | 
         
            +
                    return "./demo/tryagain.mp4"
         
     | 
| 178 | 
         
            +
                
         
     | 
| 179 | 
         
            +
                #translation.text
         
     | 
| 180 | 
         
            +
                trans=translation.text
         
     | 
| 181 | 
         
            +
             
     | 
| 182 | 
         
            +
                myobj = gTTS(text=trans, lang=lang, slow=False) 
         
     | 
| 183 | 
         
            +
                myobj.save("audio.wav") 
         
     | 
| 184 | 
         
            +
                # loading audio file
         
     | 
| 185 | 
         
            +
                audioclip = AudioFileClip("audio.wav")
         
     | 
| 186 | 
         
            +
                
         
     | 
| 187 | 
         
            +
                # adding audio to the video clip
         
     | 
| 188 | 
         
            +
                new_audioclip = CompositeAudioClip([audioclip])
         
     | 
| 189 | 
         
            +
                videoclip.audio = new_audioclip
         
     | 
| 190 | 
         
            +
                new_video="video_translated_"+lang+".mp4"
         
     | 
| 191 | 
         
            +
              
         
     | 
| 192 | 
         
            +
                # Return back to main directory
         
     | 
| 193 | 
         
            +
                os.chdir(home_dir)
         
     | 
| 194 | 
         
            +
                print('Final directory',os.getcwd())
         
     | 
| 195 | 
         
            +
             
     | 
| 196 | 
         
            +
                videoclip.write_videofile(new_video)
         
     | 
| 197 | 
         
            +
             
     | 
| 198 | 
         
            +
                videoclip.close()
         
     | 
| 199 | 
         
            +
                del file_obj
         
     | 
| 200 | 
         
            +
             
     | 
| 201 | 
         
            +
                return new_video
         
     | 
| 202 | 
         
            +
             
     | 
| 203 | 
         
            +
            initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
         
     | 
| 204 | 
         
            +
            final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
         
     | 
| 205 | 
         
            +
            url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
         
     | 
| 206 | 
         
            +
             
     | 
| 207 | 
         
            +
             
     | 
| 208 | 
         
            +
            gr.Interface(fn = video_to_translate,
         
     | 
| 209 | 
         
            +
                        inputs = [url,initial_language,final_language],
         
     | 
| 210 | 
         
            +
                        outputs = 'video', 
         
     | 
| 211 | 
         
            +
                        verbose = True,
         
     | 
| 212 | 
         
            +
                        title = 'Video Youtube Translator',
         
     | 
| 213 | 
         
            +
                        description = 'A simple application that translates Youtube videos from English, Italian, Japanese, Russian, Spanish, and German  to  Italian, Spanish, Russian, English and Japanese.  Wait one minute to process.',
         
     | 
| 214 | 
         
            +
                        article = 
         
     | 
| 215 | 
         
            +
                                    '''<div>
         
     | 
| 216 | 
         
            +
                                        <p style="text-align: center"> All you need to do is to paste the Youtube link  and hit submit, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
         
     | 
| 217 | 
         
            +
                                        For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
         
     | 
| 218 | 
         
            +
                                        </p>
         
     | 
| 219 | 
         
            +
                                    </div>''',
         
     | 
| 220 | 
         
            +
             
     | 
| 221 | 
         
            +
                       examples = [
         
     | 
| 222 | 
         
            +
                                    ["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"],
         
     | 
| 223 | 
         
            +
                                    ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Spanish"],
         
     | 
| 224 | 
         
            +
                                    ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
         
     | 
| 225 | 
         
            +
                                    ["https://www.youtube.com/watch?v=_5YeX8eCLgA&ab_channel=TheTelegraph", "Russian","English"],
         
     | 
| 226 | 
         
            +
                                    ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
         
     | 
| 227 | 
         
            +
                                    ["https://www.youtube.com/watch?v=eo17uDr2_XA", "German","Spanish"]
         
     | 
| 228 | 
         
            +
                                    ]           
         
     | 
| 229 | 
         
            +
                        ).launch()
         
     | 
    	
        demo/tryagain.mp4
    ADDED
    
    | 
         Binary file (307 kB). View file 
     | 
| 
         | 
    	
        requirements.txt
    ADDED
    
    | 
         @@ -0,0 +1,8 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            pip==22.2.2
         
     | 
| 2 | 
         
            +
            gradio==3.0.24
         
     | 
| 3 | 
         
            +
            googletrans==4.0.0rc1
         
     | 
| 4 | 
         
            +
            moviepy
         
     | 
| 5 | 
         
            +
            SpeechRecognition
         
     | 
| 6 | 
         
            +
            gTTS
         
     | 
| 7 | 
         
            +
            youtube_transcript_api
         
     | 
| 8 | 
         
            +
            pytube
         
     | 
    	
        utils.py
    ADDED
    
    | 
         @@ -0,0 +1,37 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from pydub import AudioSegment
         
     | 
| 2 | 
         
            +
            #from pydub.utils import mediainfo
         
     | 
| 3 | 
         
            +
            from pydub.utils import make_chunks
         
     | 
| 4 | 
         
            +
            import math
         
     | 
| 5 | 
         
            +
            #flac_audio = AudioSegment.from_file("sample.flac", "flac")
         
     | 
| 6 | 
         
            +
            #flac_audio.export("audio.wav", format="wav")
         
     | 
| 7 | 
         
            +
            def split_audio_wav(filename):
         
     | 
| 8 | 
         
            +
                myaudio = AudioSegment.from_file(filename , "wav")
         
     | 
| 9 | 
         
            +
                channel_count = myaudio.channels    #Get channels
         
     | 
| 10 | 
         
            +
                sample_width = myaudio.sample_width #Get sample width
         
     | 
| 11 | 
         
            +
                duration_in_sec = len(myaudio) / 1000#Length of audio in sec
         
     | 
| 12 | 
         
            +
                sample_rate = myaudio.frame_rate
         
     | 
| 13 | 
         
            +
                print("sample_width=", sample_width) 
         
     | 
| 14 | 
         
            +
                print("channel_count=", channel_count)
         
     | 
| 15 | 
         
            +
                print("duration_in_sec=", duration_in_sec) 
         
     | 
| 16 | 
         
            +
                print("frame_rate=", sample_rate)
         
     | 
| 17 | 
         
            +
                bit_rate =16  #assumption , you can extract from mediainfo("test.wav") dynamically
         
     | 
| 18 | 
         
            +
                wav_file_size = (sample_rate * bit_rate * channel_count * duration_in_sec) / 8
         
     | 
| 19 | 
         
            +
                print("wav_file_size = ",wav_file_size)
         
     | 
| 20 | 
         
            +
                file_split_size = 40000000  # 40mb OR 40, 000, 000 bytes
         
     | 
| 21 | 
         
            +
                total_chunks =  wav_file_size // file_split_size
         
     | 
| 22 | 
         
            +
                #Get chunk size by following method #There are more than one ofcourse
         
     | 
| 23 | 
         
            +
                #for  duration_in_sec (X) -->  wav_file_size (Y)
         
     | 
| 24 | 
         
            +
                #So   whats duration in sec  (K) --> for file size of 40Mb
         
     | 
| 25 | 
         
            +
                #  K = X * 40Mb / Y
         
     | 
| 26 | 
         
            +
                chunk_length_in_sec = math.ceil((duration_in_sec * 40000000 ) /wav_file_size)   #in sec
         
     | 
| 27 | 
         
            +
                chunk_length_ms = chunk_length_in_sec * 1000
         
     | 
| 28 | 
         
            +
                chunks = make_chunks(myaudio, chunk_length_ms)
         
     | 
| 29 | 
         
            +
                number_chunks=len(chunks)
         
     | 
| 30 | 
         
            +
                chunks_list=[]
         
     | 
| 31 | 
         
            +
                #Export all of the individual chunks as wav files
         
     | 
| 32 | 
         
            +
                for i, chunk in enumerate(chunks):
         
     | 
| 33 | 
         
            +
                    chunk_name = "chunk{0}.wav".format(i)
         
     | 
| 34 | 
         
            +
                    print("exporting", chunk_name)
         
     | 
| 35 | 
         
            +
                    chunk.export(chunk_name, format="wav")
         
     | 
| 36 | 
         
            +
                    chunks_list.append(chunk_name)
         
     | 
| 37 | 
         
            +
                return chunks_list
         
     |