IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Sep 9, 2024

Commit

da4dbd8

verified ·

1 Parent(s): 7e81093

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -106

app.py CHANGED Viewed

@@ -713,54 +713,29 @@ def generate_map(location_names):
 #     return image_1, image_2, image_3
-# Clear any cached memory
-torch.cuda.empty_cache()
-import gradio as gr
 import torch
 from diffusers import FluxPipeline
-# Check if CUDA (GPU) is available, otherwise fallback to CPU
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# Function to initialize Flux bot model
-def initialize_flux_bot():
-    try:
-        torch.cuda.empty_cache()  # Clear GPU memory cache
-        pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.float16)  # Use FP16
-        pipe.to(device)  # Move the model to the correct device (GPU/CPU)
-    except torch.cuda.OutOfMemoryError:
-        print("CUDA out of memory, switching to CPU.")
-        pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.float32)  # Use FP32 for CPU
-        pipe.to("cpu")
     return pipe
-# Function to generate image using Flux bot on the specified device
-def generate_image_flux(prompt):
-    pipe = initialize_flux_bot()
-    image = pipe(
         prompt,
         guidance_scale=0.0,
-        num_inference_steps=2,
-        max_sequence_length=128,
-        generator=torch.Generator(device).manual_seed(0)
     ).images[0]
     return image
-# Hardcoded prompts for the images
-hardcoded_prompt_1 = "A high quality cinematic image for Toyota Truck in Birmingham skyline shot in the style of Michael Mann"
-hardcoded_prompt_2 = "A high quality cinematic image for Alabama Quarterback close up emotional shot in the style of Michael Mann"
-hardcoded_prompt_3 = "A high quality cinematic image for Taylor Swift concert in Birmingham skyline style of Michael Mann"
-# Function to update images
-def update_images():
-    image_1 = generate_image_flux(hardcoded_prompt_1)
-    image_2 = generate_image_flux(hardcoded_prompt_2)
-    image_3 = generate_image_flux(hardcoded_prompt_3)
-    return image_1, image_2, image_3
@@ -949,76 +924,76 @@ def generate_audio_elevenlabs(text):
 # chunking audio and then Process
-# import concurrent.futures
-# import tempfile
-# import os
-# import numpy as np
-# import logging
-# from queue import Queue
-# from threading import Thread
-# from scipy.io.wavfile import write as write_wav
-# from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
-# from transformers import AutoTokenizer
-# # Ensure your device is set to CUDA
-# device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# repo_id = "parler-tts/parler-tts-mini-v1"
-# def generate_audio_parler_tts(text):
-#     description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
-#     chunk_size_in_s = 0.5
-#     # Initialize the tokenizer and model
-#     parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
-#     parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
-#     sampling_rate = parler_model.audio_encoder.config.sampling_rate
-#     frame_rate = parler_model.audio_encoder.config.frame_rate
-#     def generate(text, description, play_steps_in_s=0.5):
-#         play_steps = int(frame_rate * play_steps_in_s)
-#         streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
-#         inputs = parler_tokenizer(description, return_tensors="pt").to(device)
-#         prompt = parler_tokenizer(text, return_tensors="pt").to(device)
-#         generation_kwargs = dict(
-#             input_ids=inputs.input_ids,
-#             prompt_input_ids=prompt.input_ids,
-#             attention_mask=inputs.attention_mask,
-#             prompt_attention_mask=prompt.attention_mask,
-#             streamer=streamer,
-#             do_sample=True,
-#             temperature=1.0,
-#             min_new_tokens=10,
-#         )
-#         thread = Thread(target=parler_model.generate, kwargs=generation_kwargs)
-#         thread.start()
-#         for new_audio in streamer:
-#             if new_audio.shape[0] == 0:
-#                 break
-#             # Save or process each audio chunk as it is generated
-#             yield sampling_rate, new_audio
-#     audio_segments = []
-#     for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
-#         audio_segments.append(audio_chunk)
-#         temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_chunk_{len(audio_segments)}.wav")
-#         write_wav(temp_audio_path, sampling_rate, audio_chunk.astype(np.float32))
-#         logging.debug(f"Saved chunk to {temp_audio_path}")
-#     # Combine all the audio chunks into one audio file
-#     combined_audio = np.concatenate(audio_segments)
-#     combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
-#     write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
-#     logging.debug(f"Combined audio saved to {combined_audio_path}")
-#     return combined_audio_path
 def fetch_local_events():
@@ -1503,15 +1478,19 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             news_output = gr.HTML(value=fetch_local_news())
             events_output = gr.HTML(value=fetch_local_events())
         with gr.Column():
-            # image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
-            # image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
-            # image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
-            # Display images
-            image_output_1 = gr.Image(value=generate_image_flux(hardcoded_prompt_1), width=400, height=400)
-            image_output_2 = gr.Image(value=generate_image_flux(hardcoded_prompt_2), width=400, height=400)
-            image_output_3 = gr.Image(value=generate_image_flux(hardcoded_prompt_3), width=400, height=400)
             # Refresh button to update images
             refresh_button = gr.Button("Refresh Images")

 #     return image_1, image_2, image_3
 import torch
 from diffusers import FluxPipeline
+def initialize_flux_pipeline():
+    pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
+    pipe.enable_model_cpu_offload()  # Offload to CPU to save VRAM
     return pipe
+# Initialize the model
+flux_pipe = initialize_flux_pipeline()
+def generate_flux_image(prompt):
+    # Use the initialized flux_pipe to generate an image based on the input prompt
+    image = flux_pipe(
         prompt,
         guidance_scale=0.0,
+        num_inference_steps=4,
+        max_sequence_length=256,
+        generator=torch.Generator("cpu").manual_seed(0)
     ).images[0]
     return image
 # chunking audio and then Process
+import concurrent.futures
+import tempfile
+import os
+import numpy as np
+import logging
+from queue import Queue
+from threading import Thread
+from scipy.io.wavfile import write as write_wav
+from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
+from transformers import AutoTokenizer
+# Ensure your device is set to CUDA
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+repo_id = "parler-tts/parler-tts-mini-v1"
+def generate_audio_parler_tts(text):
+    description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
+    chunk_size_in_s = 0.5
+    # Initialize the tokenizer and model
+    parler_tokenizer = AutoTokenizer.from_pretrained(repo_id)
+    parler_model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
+    sampling_rate = parler_model.audio_encoder.config.sampling_rate
+    frame_rate = parler_model.audio_encoder.config.frame_rate
+    def generate(text, description, play_steps_in_s=0.5):
+        play_steps = int(frame_rate * play_steps_in_s)
+        streamer = ParlerTTSStreamer(parler_model, device=device, play_steps=play_steps)
+        inputs = parler_tokenizer(description, return_tensors="pt").to(device)
+        prompt = parler_tokenizer(text, return_tensors="pt").to(device)
+        generation_kwargs = dict(
+            input_ids=inputs.input_ids,
+            prompt_input_ids=prompt.input_ids,
+            attention_mask=inputs.attention_mask,
+            prompt_attention_mask=prompt.attention_mask,
+            streamer=streamer,
+            do_sample=True,
+            temperature=1.0,
+            min_new_tokens=10,
+        )
+        thread = Thread(target=parler_model.generate, kwargs=generation_kwargs)
+        thread.start()
+        for new_audio in streamer:
+            if new_audio.shape[0] == 0:
+                break
+            # Save or process each audio chunk as it is generated
+            yield sampling_rate, new_audio
+    audio_segments = []
+    for (sampling_rate, audio_chunk) in generate(text, description, chunk_size_in_s):
+        audio_segments.append(audio_chunk)
+        temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_chunk_{len(audio_segments)}.wav")
+        write_wav(temp_audio_path, sampling_rate, audio_chunk.astype(np.float32))
+        logging.debug(f"Saved chunk to {temp_audio_path}")
+    # Combine all the audio chunks into one audio file
+    combined_audio = np.concatenate(audio_segments)
+    combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio_stream.wav")
+    write_wav(combined_audio_path, sampling_rate, combined_audio.astype(np.float32))
+    logging.debug(f"Combined audio saved to {combined_audio_path}")
+    return combined_audio_path
 def fetch_local_events():
             news_output = gr.HTML(value=fetch_local_news())
             events_output = gr.HTML(value=fetch_local_events())
         with gr.Column():
+            flux_prompt = gr.Textbox(show_copy_button=True, label="Flux Prompt", placeholder="Enter prompt for Flux image generation")
+            flux_image_output = gr.Image()
+            flux_generate_button = gr.Button("Generate Flux Image")
+            # When the button is clicked, the image generation function is triggered
+            flux_generate_button.click(fn=generate_flux_image, inputs=flux_prompt, outputs=flux_image_output)
             # Refresh button to update images
             refresh_button = gr.Button("Refresh Images")