Spaces:
Runtime error
Runtime error
app.py
CHANGED
|
@@ -46,27 +46,38 @@ def resize(img_list):
|
|
| 46 |
#clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
|
| 47 |
|
| 48 |
|
| 49 |
-
def merge_audio_video(
|
| 50 |
print("** inside merge aud vid **")
|
| 51 |
-
#String a list of images into a video and write to memory
|
| 52 |
print(type(resize_img_list))
|
| 53 |
print(type(resize_img_list[0]))
|
| 54 |
-
|
| 55 |
-
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
|
| 56 |
-
clip.write_videofile('my_vid_tmp.mp4')
|
| 57 |
-
|
| 58 |
#convert flac to mp3 audio format
|
| 59 |
wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
|
| 60 |
print('flac audio read', type(wav_audio))
|
| 61 |
wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
|
| 62 |
print('flac audio converted to mp3 audio' )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
videoclip = VideoFileClip('my_vid_tmp.mp4') #("/content/gdrive/My Drive/AI/my_video1.mp4")
|
| 66 |
-
print('video
|
|
|
|
| 67 |
# loading audio file
|
| 68 |
audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
|
| 69 |
-
print('mp3
|
| 70 |
# adding audio to the video clip
|
| 71 |
mergedclip = videoclip.set_audio(audioclip)
|
| 72 |
print('video and audio merged')
|
|
@@ -79,6 +90,7 @@ def merge_audio_video(fps, resize_img_list, speech):
|
|
| 79 |
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
|
| 80 |
|
| 81 |
def text2speech(text):
|
|
|
|
| 82 |
return fastspeech(text)
|
| 83 |
|
| 84 |
def engine(text_input):
|
|
@@ -99,23 +111,25 @@ def engine(text_input):
|
|
| 99 |
print('img_list size:',len(img_list))
|
| 100 |
#Resizing all images produced to same size
|
| 101 |
resize_img_list = resize(img_list)
|
|
|
|
| 102 |
|
| 103 |
#Convert text to speech using facebook's latest model from HF hub
|
| 104 |
speech = text2speech(text_input)
|
|
|
|
| 105 |
|
| 106 |
#getting audio clip's duration
|
| 107 |
-
audio_length = int(WAVE(speech).info.length)
|
| 108 |
|
| 109 |
#Calculate the desired frame per second based on given audio length and entities identified
|
| 110 |
-
fps= entities_num / audio_length #19 #length of audio file #13 / 19
|
| 111 |
-
fps = float(format(fps, '.5f'))
|
| 112 |
-
print('fps is: ',fps)
|
| 113 |
|
| 114 |
#Convert string of images into a video
|
| 115 |
#clip = images_to_video(fps, resize_img_list)
|
| 116 |
|
| 117 |
#Merge video and audio created above
|
| 118 |
-
mergedclip = merge_audio_video(
|
| 119 |
#{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
|
| 120 |
#img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
|
| 121 |
#outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
|
|
|
|
| 46 |
#clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
|
| 47 |
|
| 48 |
|
| 49 |
+
def merge_audio_video(entities_num, resize_img_list, speech):
|
| 50 |
print("** inside merge aud vid **")
|
|
|
|
| 51 |
print(type(resize_img_list))
|
| 52 |
print(type(resize_img_list[0]))
|
| 53 |
+
|
|
|
|
|
|
|
|
|
|
| 54 |
#convert flac to mp3 audio format
|
| 55 |
wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
|
| 56 |
print('flac audio read', type(wav_audio))
|
| 57 |
wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
|
| 58 |
print('flac audio converted to mp3 audio' )
|
| 59 |
+
print('now getting duration of this mp3 audio' )
|
| 60 |
+
#getting audio clip's duration
|
| 61 |
+
audio_length = int(WAVE("audio.mp3").info.length)
|
| 62 |
+
|
| 63 |
+
#Calculate the desired frame per second based on given audio length and entities identified
|
| 64 |
+
fps= entities_num / audio_length #19 #length of audio file #13 / 19
|
| 65 |
+
fps = float(format(fps, '.5f'))
|
| 66 |
+
print('fps is: ',fps)
|
| 67 |
|
| 68 |
+
#String a list of images into a video and write to memory
|
| 69 |
+
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
|
| 70 |
+
clip.write_videofile('my_vid_tmp.mp4')
|
| 71 |
+
print('video clip created from images')
|
| 72 |
+
|
| 73 |
+
# loading video file
|
| 74 |
+
print('Starting video and audio merge')
|
| 75 |
videoclip = VideoFileClip('my_vid_tmp.mp4') #("/content/gdrive/My Drive/AI/my_video1.mp4")
|
| 76 |
+
print('loading video-clip audio')
|
| 77 |
+
|
| 78 |
# loading audio file
|
| 79 |
audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
|
| 80 |
+
print('loading mp3-format audio')
|
| 81 |
# adding audio to the video clip
|
| 82 |
mergedclip = videoclip.set_audio(audioclip)
|
| 83 |
print('video and audio merged')
|
|
|
|
| 90 |
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
|
| 91 |
|
| 92 |
def text2speech(text):
|
| 93 |
+
print('inside testtospeech')
|
| 94 |
return fastspeech(text)
|
| 95 |
|
| 96 |
def engine(text_input):
|
|
|
|
| 111 |
print('img_list size:',len(img_list))
|
| 112 |
#Resizing all images produced to same size
|
| 113 |
resize_img_list = resize(img_list)
|
| 114 |
+
print('back from resize')
|
| 115 |
|
| 116 |
#Convert text to speech using facebook's latest model from HF hub
|
| 117 |
speech = text2speech(text_input)
|
| 118 |
+
print('back in engine')
|
| 119 |
|
| 120 |
#getting audio clip's duration
|
| 121 |
+
#audio_length = int(WAVE(speech).info.length)
|
| 122 |
|
| 123 |
#Calculate the desired frame per second based on given audio length and entities identified
|
| 124 |
+
#fps= entities_num / audio_length #19 #length of audio file #13 / 19
|
| 125 |
+
#fps = float(format(fps, '.5f'))
|
| 126 |
+
#print('fps is: ',fps)
|
| 127 |
|
| 128 |
#Convert string of images into a video
|
| 129 |
#clip = images_to_video(fps, resize_img_list)
|
| 130 |
|
| 131 |
#Merge video and audio created above
|
| 132 |
+
mergedclip = merge_audio_video(entities_num, resize_img_list, speech)
|
| 133 |
#{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
|
| 134 |
#img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
|
| 135 |
#outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
|