TextToVideo-Dalle

Running

App Files Files Community

ruslanmv commited on Jan 17

Commit

deebc86

verified ·

1 Parent(s): 3ef7fc6

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import os
 import glob
 import subprocess
 import imageio_ffmpeg
-import os
 # Define a fallback for environments without GPU
 if os.environ.get("SPACES_ZERO_GPU") is not None:
@@ -24,6 +24,7 @@ else:
             def wrapper(*args, **kwargs):
                 return func(*args, **kwargs)
             return wrapper
 # Ensure 'punkt' is downloaded for nltk
 try:
     nltk.data.find('tokenizers/punkt')
@@ -47,7 +48,6 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model.to(device)
 print(f"Using device: {device}")
 def get_output_video(text):
     print("Starting get_output_video function...")
     inputs = tokenizer(text,
@@ -67,7 +67,8 @@ def get_output_video(text):
     If you have an Ampere architecture GPU you can use torch.bfloat16.
         Set the device to either "cuda" or "cpu". Once everything has finished initializing,
     float32 is faster than float16 but uses more GPU memory.
-        '''
     #@spaces.GPU(duration=60 * 3)
     def generate_image(
         is_mega: bool,
@@ -85,10 +86,10 @@ def get_output_video(text):
             models_root=models_root,
             is_reusable=True,
             is_verbose=True,
-            dtype=torch.float16 if fp16 else torch.float32,
             device=device
         )
         # Ensure correct dtype for inputs
         image = model.generate_image(
             text,
@@ -99,7 +100,6 @@ def get_output_video(text):
         )
         print(f"Image generated successfully.")
         return image
     generated_images = []
     for i, senten in enumerate(plot[:-1]):
@@ -113,7 +113,8 @@ def get_output_video(text):
                 top_k=256,  # param {type:"integer"}
                 image_path='generated',
                 models_root='pretrained',
-                fp16=True, )
             generated_images.append(image)
             print(f"Image {i+1} generated and appended.")
         except Exception as e:
@@ -124,6 +125,7 @@ def get_output_video(text):
     sentences = plot[:-1]
     num_sentences = len(sentences)
     assert len(generated_images) == len(sentences), print('Something is wrong')
     # We can generate our list of subtitles
     from nltk import tokenize
     c = 0
@@ -158,7 +160,6 @@ def get_output_video(text):
             if not os.path.exists(path_font):
                 print("Font file not found. Subtitles might not be rendered correctly.")
                 path_font = None
         if path_font is not None:
             try:
                 font = ImageFont.truetype(path_font, fontsize)
@@ -167,7 +168,7 @@ def get_output_video(text):
                 draw_multiple_line_text(image, text1, font, text_color, text_start_height)
             except Exception as e:
                 print(f"Error loading or using font: {e}")
         return image
     generated_images_sub = []
@@ -210,7 +211,6 @@ def get_output_video(text):
     export_path = 'result.mp3'
     silence = AudioSegment.silent(duration=500)
     full_audio = AudioSegment.empty()
     for n, mp3_file in enumerate(mp3_names):
         mp3_file = mp3_file.replace(chr(92), '/')
         print(f"Merging audio file: {mp3_file}")
@@ -259,7 +259,7 @@ def get_output_video(text):
     combine_audio(movie_name, export_path, movie_final)  # create a new file
     print("Video and audio merged successfully!")
     # Cleanup intermediate files
     for f in file_names:
         os.remove(f)
@@ -272,9 +272,9 @@ def get_output_video(text):
     print("Finished get_output_video function.")
     return 'result_final.mp4'
 text = 'Once, there was a girl called Laura who went to the supermarket to buy the ingredients to make a cake. Because today is her birthday and her friends come to her house and help her to prepare the cake.'
 demo = gr.Blocks()
 with demo:
     gr.Markdown("# Video Generator from stories with Artificial Intelligence")
     gr.Markdown(
@@ -293,4 +293,5 @@ with demo:
     gr.Markdown(
         "This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
     button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
 demo.launch(debug=True)

 import glob
 import subprocess
 import imageio_ffmpeg
 # Define a fallback for environments without GPU
 if os.environ.get("SPACES_ZERO_GPU") is not None:
             def wrapper(*args, **kwargs):
                 return func(*args, **kwargs)
             return wrapper
 # Ensure 'punkt' is downloaded for nltk
 try:
     nltk.data.find('tokenizers/punkt')
 model.to(device)
 print(f"Using device: {device}")
 def get_output_video(text):
     print("Starting get_output_video function...")
     inputs = tokenizer(text,
     If you have an Ampere architecture GPU you can use torch.bfloat16.
         Set the device to either "cuda" or "cpu". Once everything has finished initializing,
     float32 is faster than float16 but uses more GPU memory.
+    '''
     #@spaces.GPU(duration=60 * 3)
     def generate_image(
         is_mega: bool,
             models_root=models_root,
             is_reusable=True,
             is_verbose=True,
+            dtype=torch.float16 if fp16 else torch.float32,  # ensures correct data type
             device=device
         )
         # Ensure correct dtype for inputs
         image = model.generate_image(
             text,
         )
         print(f"Image generated successfully.")
         return image
     generated_images = []
     for i, senten in enumerate(plot[:-1]):
                 top_k=256,  # param {type:"integer"}
                 image_path='generated',
                 models_root='pretrained',
+                fp16=True,
+            )
             generated_images.append(image)
             print(f"Image {i+1} generated and appended.")
         except Exception as e:
     sentences = plot[:-1]
     num_sentences = len(sentences)
     assert len(generated_images) == len(sentences), print('Something is wrong')
     # We can generate our list of subtitles
     from nltk import tokenize
     c = 0
             if not os.path.exists(path_font):
                 print("Font file not found. Subtitles might not be rendered correctly.")
                 path_font = None
         if path_font is not None:
             try:
                 font = ImageFont.truetype(path_font, fontsize)
                 draw_multiple_line_text(image, text1, font, text_color, text_start_height)
             except Exception as e:
                 print(f"Error loading or using font: {e}")
         return image
     generated_images_sub = []
     export_path = 'result.mp3'
     silence = AudioSegment.silent(duration=500)
     full_audio = AudioSegment.empty()
     for n, mp3_file in enumerate(mp3_names):
         mp3_file = mp3_file.replace(chr(92), '/')
         print(f"Merging audio file: {mp3_file}")
     combine_audio(movie_name, export_path, movie_final)  # create a new file
     print("Video and audio merged successfully!")
     # Cleanup intermediate files
     for f in file_names:
         os.remove(f)
     print("Finished get_output_video function.")
     return 'result_final.mp4'
 text = 'Once, there was a girl called Laura who went to the supermarket to buy the ingredients to make a cake. Because today is her birthday and her friends come to her house and help her to prepare the cake.'
 demo = gr.Blocks()
 with demo:
     gr.Markdown("# Video Generator from stories with Artificial Intelligence")
     gr.Markdown(
     gr.Markdown(
         "This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
     button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
 demo.launch(debug=True)