Spaces:
Runtime error
Runtime error
app.py
CHANGED
@@ -46,27 +46,38 @@ def resize(img_list):
|
|
46 |
#clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
|
47 |
|
48 |
|
49 |
-
def merge_audio_video(
|
50 |
print("** inside merge aud vid **")
|
51 |
-
#String a list of images into a video and write to memory
|
52 |
print(type(resize_img_list))
|
53 |
print(type(resize_img_list[0]))
|
54 |
-
|
55 |
-
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
|
56 |
-
clip.write_videofile('my_vid_tmp.mp4')
|
57 |
-
|
58 |
#convert flac to mp3 audio format
|
59 |
wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
|
60 |
print('flac audio read', type(wav_audio))
|
61 |
wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
|
62 |
print('flac audio converted to mp3 audio' )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
videoclip = VideoFileClip('my_vid_tmp.mp4') #("/content/gdrive/My Drive/AI/my_video1.mp4")
|
66 |
-
print('video
|
|
|
67 |
# loading audio file
|
68 |
audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
|
69 |
-
print('mp3
|
70 |
# adding audio to the video clip
|
71 |
mergedclip = videoclip.set_audio(audioclip)
|
72 |
print('video and audio merged')
|
@@ -79,6 +90,7 @@ def merge_audio_video(fps, resize_img_list, speech):
|
|
79 |
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
|
80 |
|
81 |
def text2speech(text):
|
|
|
82 |
return fastspeech(text)
|
83 |
|
84 |
def engine(text_input):
|
@@ -99,23 +111,25 @@ def engine(text_input):
|
|
99 |
print('img_list size:',len(img_list))
|
100 |
#Resizing all images produced to same size
|
101 |
resize_img_list = resize(img_list)
|
|
|
102 |
|
103 |
#Convert text to speech using facebook's latest model from HF hub
|
104 |
speech = text2speech(text_input)
|
|
|
105 |
|
106 |
#getting audio clip's duration
|
107 |
-
audio_length = int(WAVE(speech).info.length)
|
108 |
|
109 |
#Calculate the desired frame per second based on given audio length and entities identified
|
110 |
-
fps= entities_num / audio_length #19 #length of audio file #13 / 19
|
111 |
-
fps = float(format(fps, '.5f'))
|
112 |
-
print('fps is: ',fps)
|
113 |
|
114 |
#Convert string of images into a video
|
115 |
#clip = images_to_video(fps, resize_img_list)
|
116 |
|
117 |
#Merge video and audio created above
|
118 |
-
mergedclip = merge_audio_video(
|
119 |
#{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
|
120 |
#img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
|
121 |
#outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
|
|
|
46 |
#clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
|
47 |
|
48 |
|
49 |
+
def merge_audio_video(entities_num, resize_img_list, speech):
|
50 |
print("** inside merge aud vid **")
|
|
|
51 |
print(type(resize_img_list))
|
52 |
print(type(resize_img_list[0]))
|
53 |
+
|
|
|
|
|
|
|
54 |
#convert flac to mp3 audio format
|
55 |
wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
|
56 |
print('flac audio read', type(wav_audio))
|
57 |
wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
|
58 |
print('flac audio converted to mp3 audio' )
|
59 |
+
print('now getting duration of this mp3 audio' )
|
60 |
+
#getting audio clip's duration
|
61 |
+
audio_length = int(WAVE("audio.mp3").info.length)
|
62 |
+
|
63 |
+
#Calculate the desired frame per second based on given audio length and entities identified
|
64 |
+
fps= entities_num / audio_length #19 #length of audio file #13 / 19
|
65 |
+
fps = float(format(fps, '.5f'))
|
66 |
+
print('fps is: ',fps)
|
67 |
|
68 |
+
#String a list of images into a video and write to memory
|
69 |
+
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
|
70 |
+
clip.write_videofile('my_vid_tmp.mp4')
|
71 |
+
print('video clip created from images')
|
72 |
+
|
73 |
+
# loading video file
|
74 |
+
print('Starting video and audio merge')
|
75 |
videoclip = VideoFileClip('my_vid_tmp.mp4') #("/content/gdrive/My Drive/AI/my_video1.mp4")
|
76 |
+
print('loading video-clip audio')
|
77 |
+
|
78 |
# loading audio file
|
79 |
audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
|
80 |
+
print('loading mp3-format audio')
|
81 |
# adding audio to the video clip
|
82 |
mergedclip = videoclip.set_audio(audioclip)
|
83 |
print('video and audio merged')
|
|
|
90 |
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
|
91 |
|
92 |
def text2speech(text):
|
93 |
+
print('inside testtospeech')
|
94 |
return fastspeech(text)
|
95 |
|
96 |
def engine(text_input):
|
|
|
111 |
print('img_list size:',len(img_list))
|
112 |
#Resizing all images produced to same size
|
113 |
resize_img_list = resize(img_list)
|
114 |
+
print('back from resize')
|
115 |
|
116 |
#Convert text to speech using facebook's latest model from HF hub
|
117 |
speech = text2speech(text_input)
|
118 |
+
print('back in engine')
|
119 |
|
120 |
#getting audio clip's duration
|
121 |
+
#audio_length = int(WAVE(speech).info.length)
|
122 |
|
123 |
#Calculate the desired frame per second based on given audio length and entities identified
|
124 |
+
#fps= entities_num / audio_length #19 #length of audio file #13 / 19
|
125 |
+
#fps = float(format(fps, '.5f'))
|
126 |
+
#print('fps is: ',fps)
|
127 |
|
128 |
#Convert string of images into a video
|
129 |
#clip = images_to_video(fps, resize_img_list)
|
130 |
|
131 |
#Merge video and audio created above
|
132 |
+
mergedclip = merge_audio_video(entities_num, resize_img_list, speech)
|
133 |
#{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
|
134 |
#img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
|
135 |
#outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
|