Pijush2023 commited on
Commit
13eb1f5
·
verified ·
1 Parent(s): 564dc92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -57
app.py CHANGED
@@ -417,8 +417,8 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
417
  audio_future = executor.submit(generate_audio_elevenlabs, response)
418
  elif tts_choice == "Beta":
419
  audio_future = executor.submit(generate_audio_parler_tts, response)
420
- elif tts_choice == "Gamma":
421
- audio_future = executor.submit(generate_audio_mars5, response)
422
 
423
  for character in response:
424
  history[-1][1] += character
@@ -750,64 +750,64 @@ def generate_audio_parler_tts(text):
750
 
751
 
752
 
753
- # Load the MARS5 model
754
- mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
755
 
756
- def generate_audio_mars5(text):
757
- description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
758
- kwargs_dict = {
759
- 'temperature': 0.2,
760
- 'top_k': -1,
761
- 'top_p': 0.2,
762
- 'typical_p': 1.0,
763
- 'freq_penalty': 2.6,
764
- 'presence_penalty': 0.4,
765
- 'rep_penalty_window': 100,
766
- 'max_prompt_phones': 360,
767
- 'deep_clone': True,
768
- 'nar_guidance_w': 3
769
- }
770
 
771
- chunks = chunk_text(preprocess(text))
772
- audio_segments = []
773
 
774
- for chunk in chunks:
775
- wav = torch.zeros(1, mars5.sr) # Use a placeholder silent audio for the reference
776
- cfg = config_class(**{k: kwargs_dict[k] for k in kwargs_dict if k in config_class.__dataclass_fields__})
777
- ar_codes, wav_out = mars5.tts(chunk, wav, "", cfg=cfg)
778
 
779
- temp_audio_path = os.path.join(tempfile.gettempdir(), f"mars5_audio_{len(audio_segments)}.wav")
780
- torchaudio.save(temp_audio_path, wav_out.unsqueeze(0), mars5.sr)
781
- audio_segments.append(AudioSegment.from_wav(temp_audio_path))
782
 
783
- combined_audio = sum(audio_segments)
784
- combined_audio_path = os.path.join(tempfile.gettempdir(), "mars5_combined_audio.wav")
785
- combined_audio.export(combined_audio_path, format="wav")
786
 
787
- logging.debug(f"Audio saved to {combined_audio_path}")
788
- return combined_audio_path
789
 
790
- pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
791
- pipe.to(device)
792
 
793
- def generate_image(prompt):
794
- with torch.cuda.amp.autocast():
795
- image = pipe(
796
- prompt,
797
- num_inference_steps=28,
798
- guidance_scale=3.0,
799
- ).images[0]
800
- return image
801
 
802
- hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Toyota coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
803
- hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
804
- hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
805
 
806
- def update_images():
807
- image_1 = generate_image(hardcoded_prompt_1)
808
- image_2 = generate_image(hardcoded_prompt_2)
809
- image_3 = generate_image(hardcoded_prompt_3)
810
- return image_1, image_2, image_3
811
 
812
 
813
 
@@ -1237,7 +1237,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1237
  gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
1238
 
1239
  chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!", placeholder="Hey Radar...!!")
1240
- tts_choice = gr.Radio(label="Select TTS System", choices=["Alpha", "Beta", "Gamma"], value="Alpha")
1241
  retriever_button = gr.Button("Retriever")
1242
 
1243
  clear_button = gr.Button("Clear")
@@ -1278,13 +1278,13 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1278
  news_output = gr.HTML(value=fetch_local_news())
1279
  events_output = gr.HTML(value=fetch_local_events())
1280
 
1281
- with gr.Column():
1282
- image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
1283
- image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
1284
- image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
1285
 
1286
- refresh_button = gr.Button("Refresh Images")
1287
- refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
1288
 
1289
  demo.queue()
1290
  demo.launch(share=True)
 
417
  audio_future = executor.submit(generate_audio_elevenlabs, response)
418
  elif tts_choice == "Beta":
419
  audio_future = executor.submit(generate_audio_parler_tts, response)
420
+ # elif tts_choice == "Gamma":
421
+ # audio_future = executor.submit(generate_audio_mars5, response)
422
 
423
  for character in response:
424
  history[-1][1] += character
 
750
 
751
 
752
 
753
+ # # Load the MARS5 model
754
+ # mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
755
 
756
+ # def generate_audio_mars5(text):
757
+ # description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
758
+ # kwargs_dict = {
759
+ # 'temperature': 0.2,
760
+ # 'top_k': -1,
761
+ # 'top_p': 0.2,
762
+ # 'typical_p': 1.0,
763
+ # 'freq_penalty': 2.6,
764
+ # 'presence_penalty': 0.4,
765
+ # 'rep_penalty_window': 100,
766
+ # 'max_prompt_phones': 360,
767
+ # 'deep_clone': True,
768
+ # 'nar_guidance_w': 3
769
+ # }
770
 
771
+ # chunks = chunk_text(preprocess(text))
772
+ # audio_segments = []
773
 
774
+ # for chunk in chunks:
775
+ # wav = torch.zeros(1, mars5.sr) # Use a placeholder silent audio for the reference
776
+ # cfg = config_class(**{k: kwargs_dict[k] for k in kwargs_dict if k in config_class.__dataclass_fields__})
777
+ # ar_codes, wav_out = mars5.tts(chunk, wav, "", cfg=cfg)
778
 
779
+ # temp_audio_path = os.path.join(tempfile.gettempdir(), f"mars5_audio_{len(audio_segments)}.wav")
780
+ # torchaudio.save(temp_audio_path, wav_out.unsqueeze(0), mars5.sr)
781
+ # audio_segments.append(AudioSegment.from_wav(temp_audio_path))
782
 
783
+ # combined_audio = sum(audio_segments)
784
+ # combined_audio_path = os.path.join(tempfile.gettempdir(), "mars5_combined_audio.wav")
785
+ # combined_audio.export(combined_audio_path, format="wav")
786
 
787
+ # logging.debug(f"Audio saved to {combined_audio_path}")
788
+ # return combined_audio_path
789
 
790
+ # pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
791
+ # pipe.to(device)
792
 
793
+ # def generate_image(prompt):
794
+ # with torch.cuda.amp.autocast():
795
+ # image = pipe(
796
+ # prompt,
797
+ # num_inference_steps=28,
798
+ # guidance_scale=3.0,
799
+ # ).images[0]
800
+ # return image
801
 
802
+ # hardcoded_prompt_1 = "Give a high quality photograph of a great looking red 2026 Toyota coupe against a skyline setting in the night, michael mann style in omaha enticing the consumer to buy this product"
803
+ # hardcoded_prompt_2 = "A vibrant and dynamic football game scene in the style of Peter Paul Rubens, showcasing the intense match between Alabama and Nebraska. The players are depicted with the dramatic, muscular physiques and expressive faces typical of Rubens' style. The Alabama team is wearing their iconic crimson and white uniforms, while the Nebraska team is in their classic red and white attire. The scene is filled with action, with players in mid-motion, tackling, running, and catching the ball. The background features a grand stadium filled with cheering fans, banners, and the natural landscape in the distance. The colors are rich and vibrant, with a strong use of light and shadow to create depth and drama. The overall atmosphere captures the intensity and excitement of the game, infused with the grandeur and dynamism characteristic of Rubens' work."
804
+ # hardcoded_prompt_3 = "Create a high-energy scene of a DJ performing on a large stage with vibrant lights, colorful lasers, a lively dancing crowd, and various electronic equipment in the background."
805
 
806
+ # def update_images():
807
+ # image_1 = generate_image(hardcoded_prompt_1)
808
+ # image_2 = generate_image(hardcoded_prompt_2)
809
+ # image_3 = generate_image(hardcoded_prompt_3)
810
+ # return image_1, image_2, image_3
811
 
812
 
813
 
 
1237
  gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
1238
 
1239
  chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!", placeholder="Hey Radar...!!")
1240
+ tts_choice = gr.Radio(label="Select TTS System", choices=["Alpha", "Beta"], value="Alpha")
1241
  retriever_button = gr.Button("Retriever")
1242
 
1243
  clear_button = gr.Button("Clear")
 
1278
  news_output = gr.HTML(value=fetch_local_news())
1279
  events_output = gr.HTML(value=fetch_local_events())
1280
 
1281
+ # with gr.Column():
1282
+ # image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
1283
+ # image_output_2 = gr.Image(value=generate_image(hardcoded_prompt_2), width=400, height=400)
1284
+ # image_output_3 = gr.Image(value=generate_image(hardcoded_prompt_3), width=400, height=400)
1285
 
1286
+ # refresh_button = gr.Button("Refresh Images")
1287
+ # refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3], api_name="update_image")
1288
 
1289
  demo.queue()
1290
  demo.launch(share=True)