ysharma HF staff commited on
Commit
92e8e59
·
1 Parent(s): 399ecae
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -46,27 +46,38 @@ def resize(img_list):
46
  #clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
47
 
48
 
49
- def merge_audio_video(fps, resize_img_list, speech):
50
  print("** inside merge aud vid **")
51
- #String a list of images into a video and write to memory
52
  print(type(resize_img_list))
53
  print(type(resize_img_list[0]))
54
- print(fps)
55
- clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
56
- clip.write_videofile('my_vid_tmp.mp4')
57
-
58
  #convert flac to mp3 audio format
59
  wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
60
  print('flac audio read', type(wav_audio))
61
  wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
62
  print('flac audio converted to mp3 audio' )
 
 
 
 
 
 
 
 
63
 
64
- # loading video dsa gfg intro video
 
 
 
 
 
 
65
  videoclip = VideoFileClip('my_vid_tmp.mp4') #("/content/gdrive/My Drive/AI/my_video1.mp4")
66
- print('video clip loaded in first time')
 
67
  # loading audio file
68
  audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
69
- print('mp3 format audio clip loaded in')
70
  # adding audio to the video clip
71
  mergedclip = videoclip.set_audio(audioclip)
72
  print('video and audio merged')
@@ -79,6 +90,7 @@ def merge_audio_video(fps, resize_img_list, speech):
79
  fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
80
 
81
  def text2speech(text):
 
82
  return fastspeech(text)
83
 
84
  def engine(text_input):
@@ -99,23 +111,25 @@ def engine(text_input):
99
  print('img_list size:',len(img_list))
100
  #Resizing all images produced to same size
101
  resize_img_list = resize(img_list)
 
102
 
103
  #Convert text to speech using facebook's latest model from HF hub
104
  speech = text2speech(text_input)
 
105
 
106
  #getting audio clip's duration
107
- audio_length = int(WAVE(speech).info.length)
108
 
109
  #Calculate the desired frame per second based on given audio length and entities identified
110
- fps= entities_num / audio_length #19 #length of audio file #13 / 19
111
- fps = float(format(fps, '.5f'))
112
- print('fps is: ',fps)
113
 
114
  #Convert string of images into a video
115
  #clip = images_to_video(fps, resize_img_list)
116
 
117
  #Merge video and audio created above
118
- mergedclip = merge_audio_video(fps, resize_img_list, speech)
119
  #{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
120
  #img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
121
  #outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
 
46
  #clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
47
 
48
 
49
+ def merge_audio_video(entities_num, resize_img_list, speech):
50
  print("** inside merge aud vid **")
 
51
  print(type(resize_img_list))
52
  print(type(resize_img_list[0]))
53
+
 
 
 
54
  #convert flac to mp3 audio format
55
  wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
56
  print('flac audio read', type(wav_audio))
57
  wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
58
  print('flac audio converted to mp3 audio' )
59
+ print('now getting duration of this mp3 audio' )
60
+ #getting audio clip's duration
61
+ audio_length = int(WAVE("audio.mp3").info.length)
62
+
63
+ #Calculate the desired frame per second based on given audio length and entities identified
64
+ fps= entities_num / audio_length #19 #length of audio file #13 / 19
65
+ fps = float(format(fps, '.5f'))
66
+ print('fps is: ',fps)
67
 
68
+ #String a list of images into a video and write to memory
69
+ clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
70
+ clip.write_videofile('my_vid_tmp.mp4')
71
+ print('video clip created from images')
72
+
73
+ # loading video file
74
+ print('Starting video and audio merge')
75
  videoclip = VideoFileClip('my_vid_tmp.mp4') #("/content/gdrive/My Drive/AI/my_video1.mp4")
76
+ print('loading video-clip audio')
77
+
78
  # loading audio file
79
  audioclip = AudioFileClip('audio.mp3') #.subclip(0, 15)
80
+ print('loading mp3-format audio')
81
  # adding audio to the video clip
82
  mergedclip = videoclip.set_audio(audioclip)
83
  print('video and audio merged')
 
90
  fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
91
 
92
  def text2speech(text):
93
+ print('inside testtospeech')
94
  return fastspeech(text)
95
 
96
  def engine(text_input):
 
111
  print('img_list size:',len(img_list))
112
  #Resizing all images produced to same size
113
  resize_img_list = resize(img_list)
114
+ print('back from resize')
115
 
116
  #Convert text to speech using facebook's latest model from HF hub
117
  speech = text2speech(text_input)
118
+ print('back in engine')
119
 
120
  #getting audio clip's duration
121
+ #audio_length = int(WAVE(speech).info.length)
122
 
123
  #Calculate the desired frame per second based on given audio length and entities identified
124
+ #fps= entities_num / audio_length #19 #length of audio file #13 / 19
125
+ #fps = float(format(fps, '.5f'))
126
+ #print('fps is: ',fps)
127
 
128
  #Convert string of images into a video
129
  #clip = images_to_video(fps, resize_img_list)
130
 
131
  #Merge video and audio created above
132
+ mergedclip = merge_audio_video(entities_num, resize_img_list, speech)
133
  #{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
134
  #img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
135
  #outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )