ysharma HF staff commited on
Commit
bbff8a4
·
1 Parent(s): 92e8e59
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -46,13 +46,17 @@ def resize(img_list):
46
  #clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
47
 
48
 
49
- def merge_audio_video(entities_num, resize_img_list, speech):
50
  print("** inside merge aud vid **")
51
  print(type(resize_img_list))
52
  print(type(resize_img_list[0]))
53
 
54
- #convert flac to mp3 audio format
 
 
 
55
  wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
 
56
  print('flac audio read', type(wav_audio))
57
  wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
58
  print('flac audio converted to mp3 audio' )
@@ -114,8 +118,8 @@ def engine(text_input):
114
  print('back from resize')
115
 
116
  #Convert text to speech using facebook's latest model from HF hub
117
- speech = text2speech(text_input)
118
- print('back in engine')
119
 
120
  #getting audio clip's duration
121
  #audio_length = int(WAVE(speech).info.length)
@@ -129,7 +133,7 @@ def engine(text_input):
129
  #clip = images_to_video(fps, resize_img_list)
130
 
131
  #Merge video and audio created above
132
- mergedclip = merge_audio_video(entities_num, resize_img_list, speech)
133
  #{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
134
  #img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
135
  #outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
 
46
  #clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
47
 
48
 
49
+ def merge_audio_video(entities_num, resize_img_list, text_input):
50
  print("** inside merge aud vid **")
51
  print(type(resize_img_list))
52
  print(type(resize_img_list[0]))
53
 
54
+
55
+ #Convert text to speech using facebook's latest model from HF hub
56
+ speech = text2speech(text_input)
57
+ print(type(speech))
58
  wav_audio = AudioSegment.from_file(speech, "flac") #("/content/gdrive/My Drive/AI/audio1.flac", "flac")
59
+ #convert flac to mp3 audio format
60
  print('flac audio read', type(wav_audio))
61
  wav_audio.export("audio.mp3", format="mp3") #("/content/gdrive/My Drive/AI/audio1.mp3", format="mp3")
62
  print('flac audio converted to mp3 audio' )
 
118
  print('back from resize')
119
 
120
  #Convert text to speech using facebook's latest model from HF hub
121
+ #speech = text2speech(text_input)
122
+ #print('back in engine')
123
 
124
  #getting audio clip's duration
125
  #audio_length = int(WAVE(speech).info.length)
 
133
  #clip = images_to_video(fps, resize_img_list)
134
 
135
  #Merge video and audio created above
136
+ mergedclip = merge_audio_video(entities_num, resize_img_list, text_input)
137
  #{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
138
  #img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
139
  #outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )