IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Aug 26, 2024

Commit

13eb1f5

verified ·

1 Parent(s): 564dc92

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -57

app.py CHANGED Viewed

@@ -417,8 +417,8 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
             audio_future = executor.submit(generate_audio_elevenlabs, response)
         elif tts_choice == "Beta":
             audio_future = executor.submit(generate_audio_parler_tts, response)
-        elif tts_choice == "Gamma":
-            audio_future = executor.submit(generate_audio_mars5, response)
         for character in response:
             history[-1][1] += character
@@ -750,64 +750,64 @@ def generate_audio_parler_tts(text):
-# Load the MARS5 model
-mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
-def generate_audio_mars5(text):
-    description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
-    kwargs_dict = {
-        'temperature': 0.2,
-        'top_k': -1,
-        'top_p': 0.2,
-        'typical_p': 1.0,
-        'freq_penalty': 2.6,
-        'presence_penalty': 0.4,
-        'rep_penalty_window': 100,
-        'max_prompt_phones': 360,
-        'deep_clone': True,
-        'nar_guidance_w': 3
-    }
-    chunks = chunk_text(preprocess(text))
-    audio_segments = []
-    for chunk in chunks:
-        wav = torch.zeros(1, mars5.sr)  # Use a placeholder silent audio for the reference
-        cfg = config_class(**{k: kwargs_dict[k] for k in kwargs_dict if k in config_class.__dataclass_fields__})
-        ar_codes, wav_out = mars5.tts(chunk, wav, "", cfg=cfg)
-        temp_audio_path = os.path.join(tempfile.gettempdir(), f"mars5_audio_{len(audio_segments)}.wav")
-        torchaudio.save(temp_audio_path, wav_out.unsqueeze(0), mars5.sr)
-        audio_segments.append(AudioSegment.from_wav(temp_audio_path))
-    combined_audio = sum(audio_segments)
-    combined_audio_path = os.path.join(tempfile.gettempdir(), "mars5_combined_audio.wav")
-    combined_audio.export(combined_audio_path, format="wav")
-    logging.debug(f"Audio saved to {combined_audio_path}")
-    return combined_audio_path
-pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
-pipe.to(device)
-def generate_image(prompt):
-    with torch.cuda.amp.autocast():
-        image = pipe(
-            prompt,
-            num_inference_steps=28,
-            guidance_scale=3.0,
-        ).images[0]
-    return image
-hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Toyota coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
-hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
-hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
-def update_images():
-    image_1 = generate_image(hardcoded_prompt_1)
-    image_2 = generate_image(hardcoded_prompt_2)
-    image_3 = generate_image(hardcoded_prompt_3)
-    return image_1, image_2, image_3
@@ -1237,7 +1237,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!", placeholder="Hey Radar...!!")
-            tts_choice = gr.Radio(label="Select TTS System", choices=["Alpha", "Beta", "Gamma"], value="Alpha")
             retriever_button = gr.Button("Retriever")
             clear_button = gr.Button("Clear")
@@ -1278,13 +1278,13 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             news_output = gr.HTML(value=fetch_local_news())
             events_output = gr.HTML(value=fetch_local_events())
-        with gr.Column():
-            image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
-            image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
-            image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
-            refresh_button = gr.Button("Refresh Images")
-            refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
 demo.queue()
 demo.launch(share=True)

             audio_future = executor.submit(generate_audio_elevenlabs, response)
         elif tts_choice == "Beta":
             audio_future = executor.submit(generate_audio_parler_tts, response)
+        # elif tts_choice == "Gamma":
+        #     audio_future = executor.submit(generate_audio_mars5, response)
         for character in response:
             history[-1][1] += character
+# # Load the MARS5 model
+# mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
+# def generate_audio_mars5(text):
+#     description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
+#     kwargs_dict = {
+#         'temperature': 0.2,
+#         'top_k': -1,
+#         'top_p': 0.2,
+#         'typical_p': 1.0,
+#         'freq_penalty': 2.6,
+#         'presence_penalty': 0.4,
+#         'rep_penalty_window': 100,
+#         'max_prompt_phones': 360,
+#         'deep_clone': True,
+#         'nar_guidance_w': 3
+#     }
+#     chunks = chunk_text(preprocess(text))
+#     audio_segments = []
+#     for chunk in chunks:
+#         wav = torch.zeros(1, mars5.sr)  # Use a placeholder silent audio for the reference
+#         cfg = config_class(**{k: kwargs_dict[k] for k in kwargs_dict if k in config_class.__dataclass_fields__})
+#         ar_codes, wav_out = mars5.tts(chunk, wav, "", cfg=cfg)
+#         temp_audio_path = os.path.join(tempfile.gettempdir(), f"mars5_audio_{len(audio_segments)}.wav")
+#         torchaudio.save(temp_audio_path, wav_out.unsqueeze(0), mars5.sr)
+#         audio_segments.append(AudioSegment.from_wav(temp_audio_path))
+#     combined_audio = sum(audio_segments)
+#     combined_audio_path = os.path.join(tempfile.gettempdir(), "mars5_combined_audio.wav")
+#     combined_audio.export(combined_audio_path, format="wav")
+#     logging.debug(f"Audio saved to {combined_audio_path}")
+#     return combined_audio_path
+# pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
+# pipe.to(device)
+# def generate_image(prompt):
+#     with torch.cuda.amp.autocast():
+#         image = pipe(
+#             prompt,
+#             num_inference_steps=28,
+#             guidance_scale=3.0,
+#         ).images[0]
+#     return image
+# hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Toyota coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
+# hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
+# hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
+# def update_images():
+#     image_1 = generate_image(hardcoded_prompt_1)
+#     image_2 = generate_image(hardcoded_prompt_2)
+#     image_3 = generate_image(hardcoded_prompt_3)
+#     return image_1, image_2, image_3
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!", placeholder="Hey Radar...!!")
+            tts_choice = gr.Radio(label="Select TTS System", choices=["Alpha", "Beta"], value="Alpha")
             retriever_button = gr.Button("Retriever")
             clear_button = gr.Button("Clear")
             news_output = gr.HTML(value=fetch_local_news())
             events_output = gr.HTML(value=fetch_local_events())
+        # with gr.Column():
+        #     image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
+        #     image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
+        #     image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
+        #     refresh_button = gr.Button("Refresh Images")
+        #     refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
 demo.queue()
 demo.launch(share=True)