ysharma HF staff commited on
Commit
a0feee2
·
1 Parent(s): 2e1e454

Final updates

Browse files
Files changed (1) hide show
  1. app.py +7 -79
app.py CHANGED
@@ -1,8 +1,6 @@
1
  import gradio as gr
2
  import moviepy.video.io.ImageSequenceClip
3
- #image_folder= '/content/gdrive/My Drive/AI/sample_imgs/'
4
  from PIL import Image
5
- #import os, sys
6
  from pydub import AudioSegment
7
  # Import everything needed to edit video clips
8
  from moviepy.editor import *
@@ -11,8 +9,6 @@ import mutagen
11
  from mutagen.mp3 import MP3
12
  import cv2
13
 
14
- #path = "/content/gdrive/My Drive/AI/sample_imgs/"
15
- #dirs = os.listdir( path )
16
 
17
  def resize(img_list):
18
  print("** inside resize **")
@@ -25,28 +21,6 @@ def resize(img_list):
25
  print(type(resize_img_list[0]))
26
  return resize_img_list
27
 
28
- #def resize():
29
- # for item in dirs:
30
- # if os.path.isfile(path+item):
31
- # im = Image.open(path+item)
32
- # f, e = os.path.splitext(path+item)
33
- # imResize = im.resize((256,256), Image.ANTIALIAS)
34
- # imResize.save(f + ' resized.jpg', 'JPEG', quality=90)
35
-
36
- #resize_img_list = resize(img_list)
37
-
38
-
39
- #image_files = [os.path.join(image_folder,img)
40
- # for img in resize_img_list
41
- # if img.endswith(".jpg")]
42
- #print(image_files)
43
-
44
- #def images_to_video(fps, resize_img_list):
45
- # clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(resize_img_list, fps=fps)
46
- # return clip
47
- #clip.write_videofile('/content/gdrive/My Drive/AI/my_vid_20apr.mp4')
48
-
49
- #gradio.inputs.Audio(self, source="upload", type="numpy", label=None, optional=False)
50
 
51
  def merge_audio_video(entities_num, resize_img_list, text_input):
52
  print("** inside merge aud vid **")
@@ -68,7 +42,7 @@ def merge_audio_video(entities_num, resize_img_list, text_input):
68
  audio_length = int(MP3("audio.mp3").info.length)
69
 
70
  #Calculate the desired frame per second based on given audio length and entities identified
71
- fps= entities_num / audio_length #19 #length of audio file #13 / 19
72
  fps = float(format(fps, '.5f'))
73
  print('fps is: ',fps)
74
 
@@ -88,8 +62,6 @@ def merge_audio_video(entities_num, resize_img_list, text_input):
88
  # adding audio to the video clip
89
  mergedclip = videoclip.set_audio(audioclip)
90
  print('video and audio merged')
91
- # showing video clip
92
- #videoclip.ipython_display()
93
 
94
  #Getting size and frame count of merged video file
95
  print('Getting size and frame count of merged video file')
@@ -120,7 +92,6 @@ def engine(text_input):
120
  entities = [tupl for tupl in entities if None not in tupl]
121
  entities_num = len(entities)
122
 
123
- #img = run(text_input,'50','256','256','1',10) #entities[0][0]
124
  #Generate images using multimodelart's space for each entity identified above
125
  img_list = []
126
  for ent in entities:
@@ -132,65 +103,22 @@ def engine(text_input):
132
  resize_img_list = resize(img_list)
133
  print('back from resize')
134
 
135
- #Convert text to speech using facebook's latest model from HF hub
136
- #speech = text2speech(text_input)
137
- #print('back in engine')
138
-
139
- #getting audio clip's duration
140
- #audio_length = int(WAVE(speech).info.length)
141
-
142
- #Calculate the desired frame per second based on given audio length and entities identified
143
- #fps= entities_num / audio_length #19 #length of audio file #13 / 19
144
- #fps = float(format(fps, '.5f'))
145
- #print('fps is: ',fps)
146
-
147
- #Convert string of images into a video
148
- #clip = images_to_video(fps, resize_img_list)
149
-
150
  #Merge video and audio created above
151
  mergedclip = merge_audio_video(entities_num, resize_img_list, text_input)
152
  print('Back in engine')
153
  print(' merged clip type :',type(mergedclip))
154
  print('Writing the merged video clip to a file')
155
  mergedclip.to_videofile('mergedvideo.mp4')
156
- print('mergedvideo.mp4 created')
157
- #{'prompt':text_input,'steps':'50','width':'256','height':'256','images':'1','scale':10}).launch()
158
- #img_intfc = gr.Interface.load("spaces/multimodalart/latentdiffusion", inputs=[gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text"), gr.inputs.Textbox(lines=1, label="Input Text")],
159
- #outputs=[gr.outputs.Image(type="pil", label="output image"),gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")], )
160
- #title="Convert text to image")
161
- #img = img_intfc[0]
162
- #img = img_intfc(text_input,'50','256','256','1',10)
163
- #print(img)
164
- #print(type(img))
165
- #print(img)
166
- #print(type(img[1][0][0]))
167
- #print(img[1])
168
- #img = img[0]
169
- #inputs=['George',50,256,256,1,10]
170
- #run(prompt, steps, width, height, images, scale)
171
 
 
172
 
173
- return 'mergedvideo.mp4' #img, entities, speech
174
-
175
- #image = gr.outputs.Image(type="pil", label="output image")
176
  app = gr.Interface(engine,
177
  gr.inputs.Textbox(lines=5, label="Input Text"),
178
  gr.outputs.Video(type=None, label='Final Merged video'),
179
- #[gr.outputs.Image(type="auto", label="Output"), gr.outputs.Textbox(type="auto", label="Text"), gr.outputs.Audio(type="file", label="Speech Answer") ],
180
- #live=True,
181
- #outputs=[#gr.outputs.Textbox(type="auto", label="Text"),gr.outputs.Audio(type="file", label="Speech Answer"),
182
- #outputs= img, #gr.outputs.Carousel(label="Individual images",components=["image"]), #, gr.outputs.Textbox(label="Error")],
183
- examples = ['Apple'],
184
- description="Takes a text as input and reads it out to you."
185
- #examples=["On April 17th Sunday George celebrated Easter. He is staying at Empire State building with his parents. He is a citizen of Canada and speaks English and French fluently. His role model is former president Obama. He got 1000 dollar from his mother to visit Disney World and to buy new iPhone mobile. George likes watching Game of Thrones."]
186
  ).launch(enable_queue=True, debug=True)
187
 
188
-
189
- #get_audio = gr.Button("generate audio")
190
- #get_audio.click(text2speech, inputs=text, outputs=speech)
191
-
192
- #def greet(name):
193
- # return "Hello " + name + "!!"
194
-
195
- #iface = gr.Interface(fn=greet, inputs="text", outputs="text")
196
- #iface.launch()
 
1
  import gradio as gr
2
  import moviepy.video.io.ImageSequenceClip
 
3
  from PIL import Image
 
4
  from pydub import AudioSegment
5
  # Import everything needed to edit video clips
6
  from moviepy.editor import *
 
9
  from mutagen.mp3 import MP3
10
  import cv2
11
 
 
 
12
 
13
  def resize(img_list):
14
  print("** inside resize **")
 
21
  print(type(resize_img_list[0]))
22
  return resize_img_list
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def merge_audio_video(entities_num, resize_img_list, text_input):
26
  print("** inside merge aud vid **")
 
42
  audio_length = int(MP3("audio.mp3").info.length)
43
 
44
  #Calculate the desired frame per second based on given audio length and entities identified
45
+ fps= entities_num / audio_length #length of audio file
46
  fps = float(format(fps, '.5f'))
47
  print('fps is: ',fps)
48
 
 
62
  # adding audio to the video clip
63
  mergedclip = videoclip.set_audio(audioclip)
64
  print('video and audio merged')
 
 
65
 
66
  #Getting size and frame count of merged video file
67
  print('Getting size and frame count of merged video file')
 
92
  entities = [tupl for tupl in entities if None not in tupl]
93
  entities_num = len(entities)
94
 
 
95
  #Generate images using multimodelart's space for each entity identified above
96
  img_list = []
97
  for ent in entities:
 
103
  resize_img_list = resize(img_list)
104
  print('back from resize')
105
 
106
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  #Merge video and audio created above
108
  mergedclip = merge_audio_video(entities_num, resize_img_list, text_input)
109
  print('Back in engine')
110
  print(' merged clip type :',type(mergedclip))
111
  print('Writing the merged video clip to a file')
112
  mergedclip.to_videofile('mergedvideo.mp4')
113
+ print('mergedvideo.mp4 created')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ return 'mergedvideo.mp4'
116
 
 
 
 
117
  app = gr.Interface(engine,
118
  gr.inputs.Textbox(lines=5, label="Input Text"),
119
  gr.outputs.Video(type=None, label='Final Merged video'),
120
+ description="Takes a text as input, extracts the entities in it, generate images using multimodalart space for every entity separately. Also, generates speech from input-text using facebook's fastspeech2-en-ljspeech from hub. Creates a video by stringing all the entity-images together. Fuses the AI generated audio and video together to create a coherent movie for you to watch. A fun little app that lets you turn your text to video (well, in some ways atleast :) )" ,
121
+ examples=["On April 17th Sunday George celebrated Easter. He is staying at Empire State building with his parents. He is a citizen of Canada and speaks English and French fluently. His role model is former president Obama. He got 1000 dollar from his mother to visit Disney World and to buy new iPhone mobile. George likes watching Game of Thrones.", "Apple"]
 
 
 
 
 
122
  ).launch(enable_queue=True, debug=True)
123
 
124
+