ruslanmv commited on
Commit
deebc86
·
verified ·
1 Parent(s): 3ef7fc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -12,7 +12,7 @@ import os
12
  import glob
13
  import subprocess
14
  import imageio_ffmpeg
15
- import os
16
 
17
  # Define a fallback for environments without GPU
18
  if os.environ.get("SPACES_ZERO_GPU") is not None:
@@ -24,6 +24,7 @@ else:
24
  def wrapper(*args, **kwargs):
25
  return func(*args, **kwargs)
26
  return wrapper
 
27
  # Ensure 'punkt' is downloaded for nltk
28
  try:
29
  nltk.data.find('tokenizers/punkt')
@@ -47,7 +48,6 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
47
  model.to(device)
48
  print(f"Using device: {device}")
49
 
50
-
51
  def get_output_video(text):
52
  print("Starting get_output_video function...")
53
  inputs = tokenizer(text,
@@ -67,7 +67,8 @@ def get_output_video(text):
67
  If you have an Ampere architecture GPU you can use torch.bfloat16.
68
  Set the device to either "cuda" or "cpu". Once everything has finished initializing,
69
  float32 is faster than float16 but uses more GPU memory.
70
- '''
 
71
  #@spaces.GPU(duration=60 * 3)
72
  def generate_image(
73
  is_mega: bool,
@@ -85,10 +86,10 @@ def get_output_video(text):
85
  models_root=models_root,
86
  is_reusable=True,
87
  is_verbose=True,
88
- dtype=torch.float16 if fp16 else torch.float32,
89
  device=device
90
  )
91
-
92
  # Ensure correct dtype for inputs
93
  image = model.generate_image(
94
  text,
@@ -99,7 +100,6 @@ def get_output_video(text):
99
  )
100
  print(f"Image generated successfully.")
101
  return image
102
-
103
 
104
  generated_images = []
105
  for i, senten in enumerate(plot[:-1]):
@@ -113,7 +113,8 @@ def get_output_video(text):
113
  top_k=256, # param {type:"integer"}
114
  image_path='generated',
115
  models_root='pretrained',
116
- fp16=True, )
 
117
  generated_images.append(image)
118
  print(f"Image {i+1} generated and appended.")
119
  except Exception as e:
@@ -124,6 +125,7 @@ def get_output_video(text):
124
  sentences = plot[:-1]
125
  num_sentences = len(sentences)
126
  assert len(generated_images) == len(sentences), print('Something is wrong')
 
127
  # We can generate our list of subtitles
128
  from nltk import tokenize
129
  c = 0
@@ -158,7 +160,6 @@ def get_output_video(text):
158
  if not os.path.exists(path_font):
159
  print("Font file not found. Subtitles might not be rendered correctly.")
160
  path_font = None
161
-
162
  if path_font is not None:
163
  try:
164
  font = ImageFont.truetype(path_font, fontsize)
@@ -167,7 +168,7 @@ def get_output_video(text):
167
  draw_multiple_line_text(image, text1, font, text_color, text_start_height)
168
  except Exception as e:
169
  print(f"Error loading or using font: {e}")
170
-
171
  return image
172
 
173
  generated_images_sub = []
@@ -210,7 +211,6 @@ def get_output_video(text):
210
  export_path = 'result.mp3'
211
  silence = AudioSegment.silent(duration=500)
212
  full_audio = AudioSegment.empty()
213
-
214
  for n, mp3_file in enumerate(mp3_names):
215
  mp3_file = mp3_file.replace(chr(92), '/')
216
  print(f"Merging audio file: {mp3_file}")
@@ -259,7 +259,7 @@ def get_output_video(text):
259
 
260
  combine_audio(movie_name, export_path, movie_final) # create a new file
261
  print("Video and audio merged successfully!")
262
-
263
  # Cleanup intermediate files
264
  for f in file_names:
265
  os.remove(f)
@@ -272,9 +272,9 @@ def get_output_video(text):
272
  print("Finished get_output_video function.")
273
  return 'result_final.mp4'
274
 
275
-
276
  text = 'Once, there was a girl called Laura who went to the supermarket to buy the ingredients to make a cake. Because today is her birthday and her friends come to her house and help her to prepare the cake.'
277
  demo = gr.Blocks()
 
278
  with demo:
279
  gr.Markdown("# Video Generator from stories with Artificial Intelligence")
280
  gr.Markdown(
@@ -293,4 +293,5 @@ with demo:
293
  gr.Markdown(
294
  "This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
295
  button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
 
296
  demo.launch(debug=True)
 
12
  import glob
13
  import subprocess
14
  import imageio_ffmpeg
15
+
16
 
17
  # Define a fallback for environments without GPU
18
  if os.environ.get("SPACES_ZERO_GPU") is not None:
 
24
  def wrapper(*args, **kwargs):
25
  return func(*args, **kwargs)
26
  return wrapper
27
+
28
  # Ensure 'punkt' is downloaded for nltk
29
  try:
30
  nltk.data.find('tokenizers/punkt')
 
48
  model.to(device)
49
  print(f"Using device: {device}")
50
 
 
51
  def get_output_video(text):
52
  print("Starting get_output_video function...")
53
  inputs = tokenizer(text,
 
67
  If you have an Ampere architecture GPU you can use torch.bfloat16.
68
  Set the device to either "cuda" or "cpu". Once everything has finished initializing,
69
  float32 is faster than float16 but uses more GPU memory.
70
+ '''
71
+
72
  #@spaces.GPU(duration=60 * 3)
73
  def generate_image(
74
  is_mega: bool,
 
86
  models_root=models_root,
87
  is_reusable=True,
88
  is_verbose=True,
89
+ dtype=torch.float16 if fp16 else torch.float32, # ensures correct data type
90
  device=device
91
  )
92
+
93
  # Ensure correct dtype for inputs
94
  image = model.generate_image(
95
  text,
 
100
  )
101
  print(f"Image generated successfully.")
102
  return image
 
103
 
104
  generated_images = []
105
  for i, senten in enumerate(plot[:-1]):
 
113
  top_k=256, # param {type:"integer"}
114
  image_path='generated',
115
  models_root='pretrained',
116
+ fp16=True,
117
+ )
118
  generated_images.append(image)
119
  print(f"Image {i+1} generated and appended.")
120
  except Exception as e:
 
125
  sentences = plot[:-1]
126
  num_sentences = len(sentences)
127
  assert len(generated_images) == len(sentences), print('Something is wrong')
128
+
129
  # We can generate our list of subtitles
130
  from nltk import tokenize
131
  c = 0
 
160
  if not os.path.exists(path_font):
161
  print("Font file not found. Subtitles might not be rendered correctly.")
162
  path_font = None
 
163
  if path_font is not None:
164
  try:
165
  font = ImageFont.truetype(path_font, fontsize)
 
168
  draw_multiple_line_text(image, text1, font, text_color, text_start_height)
169
  except Exception as e:
170
  print(f"Error loading or using font: {e}")
171
+
172
  return image
173
 
174
  generated_images_sub = []
 
211
  export_path = 'result.mp3'
212
  silence = AudioSegment.silent(duration=500)
213
  full_audio = AudioSegment.empty()
 
214
  for n, mp3_file in enumerate(mp3_names):
215
  mp3_file = mp3_file.replace(chr(92), '/')
216
  print(f"Merging audio file: {mp3_file}")
 
259
 
260
  combine_audio(movie_name, export_path, movie_final) # create a new file
261
  print("Video and audio merged successfully!")
262
+
263
  # Cleanup intermediate files
264
  for f in file_names:
265
  os.remove(f)
 
272
  print("Finished get_output_video function.")
273
  return 'result_final.mp4'
274
 
 
275
  text = 'Once, there was a girl called Laura who went to the supermarket to buy the ingredients to make a cake. Because today is her birthday and her friends come to her house and help her to prepare the cake.'
276
  demo = gr.Blocks()
277
+
278
  with demo:
279
  gr.Markdown("# Video Generator from stories with Artificial Intelligence")
280
  gr.Markdown(
 
293
  gr.Markdown(
294
  "This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
295
  button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
296
+
297
  demo.launch(debug=True)