shivalikasingh commited on
Commit
8d89ea7
·
verified ·
1 Parent(s): eda43db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -17
app.py CHANGED
@@ -22,16 +22,19 @@ from elevenlabs.client import ElevenLabs
22
  from huggingface_hub import hf_hub_download
23
  from gradio.themes.utils import colors, fonts, sizes
24
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
25
- from prompt_examples import TEXT_CHAT_EXAMPLES, IMG_GEN_PROMPT_EXAMPLES, AUDIO_EXAMPLES, TEXT_CHAT_EXAMPLES_LABELS, IMG_GEN_PROMPT_EXAMPLES_LABELS, AUDIO_EXAMPLES_LABELS
26
  from preambles import CHAT_PREAMBLE, AUDIO_RESPONSE_PREAMBLE, IMG_DESCRIPTION_PREAMBLE
27
  from constants import LID_LANGUAGES, NEETS_AI_LANGID_MAP, AYA_MODEL_NAME, BATCH_SIZE, USE_ELVENLABS, USE_REPLICATE
 
 
28
 
 
29
 
30
  HF_API_TOKEN = os.getenv("HF_API_KEY")
31
  ELEVEN_LABS_KEY = os.getenv("ELEVEN_LABS_KEY")
32
  NEETS_AI_API_KEY = os.getenv("NEETS_AI_API_KEY")
33
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
34
- IMG_COHERE_API_KEY = os.getenv("IMG_COHERE_API_KEY")
35
  AUDIO_COHERE_API_KEY = os.getenv("AUDIO_COHERE_API_KEY")
36
  CHAT_COHERE_API_KEY = os.getenv("CHAT_COHERE_API_KEY")
37
 
@@ -39,7 +42,7 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
39
 
40
  # Initialize cohere clients
41
  img_prompt_client = cohere.Client(
42
- api_key=IMG_COHERE_API_KEY,
43
  client_name="c4ai-aya-expanse-img"
44
  )
45
  chat_client = cohere.Client(
@@ -326,6 +329,8 @@ def groq_whisper_tts(filename):
326
  return transcriptions.text
327
 
328
 
 
 
329
  # setup gradio app theme
330
  theme = gr.themes.Base(
331
  primary_hue=gr.themes.colors.teal,
@@ -334,8 +339,8 @@ theme = gr.themes.Base(
334
  text_size=gr.themes.sizes.text_lg,
335
  ).set(
336
  # Primary Button Color
337
- button_primary_background_fill="#114A56",
338
- button_primary_background_fill_hover="#114A56",
339
  # Block Labels
340
  block_title_text_weight="600",
341
  block_label_text_weight="600",
@@ -348,13 +353,13 @@ demo = gr.Blocks(theme=theme, analytics_enabled=False)
348
  with demo:
349
  with gr.Row(variant="panel"):
350
  with gr.Column(scale=1):
351
- gr.Image("AyaExpanse.png", elem_id="logo-img", show_label=False, show_share_button=False, show_download_button=False, show_fullscreen_button=False)
352
  with gr.Column(scale=30):
353
- gr.Markdown("""C4AI Aya Expanse is a state-of-art model with highly advanced capabilities to connect the world across languages.
354
  <br/>
355
- You can use this space to chat, speak and visualize with Aya Expanse in 23 languages.
356
  <br/>
357
- **Model**: [aya-expanse-32B](https://huggingface.co/CohereForAI/aya-expanse-32b)
358
  <br/>
359
  **Developed by**: [Cohere for AI](https://cohere.com/research) and [Cohere](https://cohere.com/)
360
  <br/>
@@ -400,7 +405,6 @@ with demo:
400
  examples_per_page=25,
401
  run_on_click=True
402
  )
403
-
404
  # End to End Testing Pipeline for speak with Aya
405
  with gr.TabItem("Speak with Aya") as speak_with_aya:
406
 
@@ -410,6 +414,7 @@ with demo:
410
  e2_audio_submit_button = gr.Button(value="Get Aya's Response", variant="primary")
411
 
412
  clear_button_microphone = gr.ClearButton()
 
413
  gr.Examples(
414
  examples=AUDIO_EXAMPLES,
415
  inputs=e2e_audio_file,
@@ -425,19 +430,16 @@ with demo:
425
  e2e_aya_audio_response = gr.Audio(type="filepath", label="Aya's Audio Response")
426
 
427
  with gr.Accordion("See Details", open=False):
428
- gr.Markdown("To enable voice interaction with Aya Expanse, this space uses [Whisper large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) and [Groq](https://groq.com/) for STT and [neets.ai](http://neets.ai/) for TTS.")
429
-
430
-
431
  # Generate Images
432
  with gr.TabItem("Visualize with Aya") as visualize_with_aya:
433
  with gr.Row():
434
  with gr.Column():
435
- input_img_prompt = gr.Textbox(placeholder="Ask anything in our 23 languages ...", label="Describe an image", lines=3)
436
- # generated_img_desc = gr.Textbox(label="Image Description generated by Aya", interactive=False, lines=3, visible=False)
437
  submit_button_img = gr.Button(value="Submit", variant="primary")
438
  clear_button_img = gr.ClearButton()
439
 
440
-
441
  with gr.Column():
442
  generated_img = gr.Image(label="Generated Image", interactive=False)
443
  input_prompt_lang = gr.Textbox(visible=False)
@@ -465,7 +467,56 @@ with demo:
465
  with gr.Row():
466
  with gr.Accordion("See Details", open=False):
467
  gr.Markdown("This space uses Aya Expanse for translating multilingual prompts and generating detailed image descriptions and [Flux Schnell](https://huggingface.co/black-forest-labs/FLUX.1-schnell) for Image Generation.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
 
 
 
 
 
 
 
 
 
 
 
 
 
469
  # Image Generation
470
  clear_button_img.click(lambda: None, None, input_img_prompt)
471
  clear_button_img.click(lambda: None, None, generated_img_desc)
@@ -509,5 +560,5 @@ with demo:
509
  )
510
 
511
  demo.load(lambda: secrets.token_hex(16), None, token)
512
-
513
  demo.queue(api_open=False, max_size=20, default_concurrency_limit=4).launch(show_api=False, allowed_paths=['/home/user/app'])
 
22
  from huggingface_hub import hf_hub_download
23
  from gradio.themes.utils import colors, fonts, sizes
24
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
25
+ from prompt_examples import TEXT_CHAT_EXAMPLES, IMG_GEN_PROMPT_EXAMPLES, AUDIO_EXAMPLES, TEXT_CHAT_EXAMPLES_LABELS, IMG_GEN_PROMPT_EXAMPLES_LABELS, AUDIO_EXAMPLES_LABELS, AYA_VISION_PROMPT_EXAMPLES
26
  from preambles import CHAT_PREAMBLE, AUDIO_RESPONSE_PREAMBLE, IMG_DESCRIPTION_PREAMBLE
27
  from constants import LID_LANGUAGES, NEETS_AI_LANGID_MAP, AYA_MODEL_NAME, BATCH_SIZE, USE_ELVENLABS, USE_REPLICATE
28
+ from aya_vision_utils import get_aya_vision_response, get_aya_vision_prompt_example
29
+ # from dotenv import load_dotenv
30
 
31
+ # load_dotenv()
32
 
33
  HF_API_TOKEN = os.getenv("HF_API_KEY")
34
  ELEVEN_LABS_KEY = os.getenv("ELEVEN_LABS_KEY")
35
  NEETS_AI_API_KEY = os.getenv("NEETS_AI_API_KEY")
36
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
37
+ IMG_GEN_COHERE_API_KEY = os.getenv("IMG_GEN_COHERE_API_KEY")
38
  AUDIO_COHERE_API_KEY = os.getenv("AUDIO_COHERE_API_KEY")
39
  CHAT_COHERE_API_KEY = os.getenv("CHAT_COHERE_API_KEY")
40
 
 
42
 
43
  # Initialize cohere clients
44
  img_prompt_client = cohere.Client(
45
+ api_key=IMG_GEN_COHERE_API_KEY,
46
  client_name="c4ai-aya-expanse-img"
47
  )
48
  chat_client = cohere.Client(
 
329
  return transcriptions.text
330
 
331
 
332
+
333
+
334
  # setup gradio app theme
335
  theme = gr.themes.Base(
336
  primary_hue=gr.themes.colors.teal,
 
339
  text_size=gr.themes.sizes.text_lg,
340
  ).set(
341
  # Primary Button Color
342
+ button_primary_background_fill="#2F70E3", #"#114A56",
343
+ button_primary_background_fill_hover="#2F70E3", #"#114A56",
344
  # Block Labels
345
  block_title_text_weight="600",
346
  block_label_text_weight="600",
 
353
  with demo:
354
  with gr.Row(variant="panel"):
355
  with gr.Column(scale=1):
356
+ gr.Image("1.png", elem_id="logo-img", show_label=False, show_share_button=False, show_download_button=False, show_fullscreen_button=False)
357
  with gr.Column(scale=30):
358
+ gr.Markdown("""C4AI Aya model family covers state-of-art models like Aya Vision and Aya Expanse with highly advanced capabilities to connect the world across languages.
359
  <br/>
360
+ You can use this space to chat, speak, visualize and see with Aya models in 23 languages.
361
  <br/>
362
+ **Model**: [aya-vision-32b](https://huggingface.co/CohereForAI/aya-vision-32b), [aya-expanse-32B](https://huggingface.co/CohereForAI/aya-expanse-32b)
363
  <br/>
364
  **Developed by**: [Cohere for AI](https://cohere.com/research) and [Cohere](https://cohere.com/)
365
  <br/>
 
405
  examples_per_page=25,
406
  run_on_click=True
407
  )
 
408
  # End to End Testing Pipeline for speak with Aya
409
  with gr.TabItem("Speak with Aya") as speak_with_aya:
410
 
 
414
  e2_audio_submit_button = gr.Button(value="Get Aya's Response", variant="primary")
415
 
416
  clear_button_microphone = gr.ClearButton()
417
+
418
  gr.Examples(
419
  examples=AUDIO_EXAMPLES,
420
  inputs=e2e_audio_file,
 
430
  e2e_aya_audio_response = gr.Audio(type="filepath", label="Aya's Audio Response")
431
 
432
  with gr.Accordion("See Details", open=False):
433
+ gr.Markdown("To enable voice interaction with Aya Expanse, this space uses [Whisper large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) and [Groq](https://groq.com/) for STT and [neets.ai](http://neets.ai/) for TTS.")
434
+
 
435
  # Generate Images
436
  with gr.TabItem("Visualize with Aya") as visualize_with_aya:
437
  with gr.Row():
438
  with gr.Column():
439
+ input_img_prompt = gr.Textbox(placeholder="Ask anything in our 23 languages ...", label="Ask anything about an image", lines=3)
 
440
  submit_button_img = gr.Button(value="Submit", variant="primary")
441
  clear_button_img = gr.ClearButton()
442
 
 
443
  with gr.Column():
444
  generated_img = gr.Image(label="Generated Image", interactive=False)
445
  input_prompt_lang = gr.Textbox(visible=False)
 
467
  with gr.Row():
468
  with gr.Accordion("See Details", open=False):
469
  gr.Markdown("This space uses Aya Expanse for translating multilingual prompts and generating detailed image descriptions and [Flux Schnell](https://huggingface.co/black-forest-labs/FLUX.1-schnell) for Image Generation.")
470
+
471
+
472
+ # Generate Images
473
+ with gr.TabItem("Aya Vision") as see_with_aya:
474
+ with gr.Row():
475
+ with gr.Column():
476
+ aya_vision_prompt = gr.Textbox(placeholder="Ask anything in our 23 languages ...", label="Input Prompt", lines=3)
477
+ aya_vision_input_img = gr.Image(label="Input Image", interactive=True, type="filepath")
478
+ submit_aya_vision = gr.Button(value="Submit", variant="primary")
479
+ clear_button_aya_vision = gr.ClearButton()
480
+ with gr.Column():
481
+ aya_vision_response = gr.Textbox(lines=3,label="Aya Vision's Response", show_copy_button=True, container=True, interactive=False)
482
+ lang_textbox = gr.Textbox(visible=False)
483
+
484
+ with gr.Row():
485
+ gr.Examples(
486
+ examples=[[lang] for lang in AYA_VISION_PROMPT_EXAMPLES.keys()],
487
+ inputs=lang_textbox,
488
+ outputs=[aya_vision_prompt, aya_vision_input_img],
489
+ fn=get_aya_vision_prompt_example,
490
+ label="Load example prompt for:",
491
+ examples_per_page=25,
492
+ run_on_click=True
493
+ )
494
+
495
+ # increase spacing between examples and Accordion components
496
+ with gr.Row():
497
+ pass
498
+ with gr.Row():
499
+ pass
500
+ with gr.Row():
501
+ pass
502
+
503
+ with gr.Row():
504
+ with gr.Accordion("See Details", open=False):
505
+ gr.Markdown("This space uses [Aya Vision](https://huggingface.co/CohereForAI/aya-vision-32b) for understanding images.")
506
+
507
 
508
+ # Aya Vision
509
+ clear_button_aya_vision.click(lambda: None, None, aya_vision_input_img)
510
+ clear_button_aya_vision.click(lambda: None, None, aya_vision_prompt)
511
+ clear_button_aya_vision.click(lambda: None, None, aya_vision_response)
512
+
513
+ submit_aya_vision.click(
514
+ get_aya_vision_response,
515
+ inputs=[aya_vision_prompt, aya_vision_input_img],
516
+ outputs=[aya_vision_response]
517
+ )
518
+
519
+
520
  # Image Generation
521
  clear_button_img.click(lambda: None, None, input_img_prompt)
522
  clear_button_img.click(lambda: None, None, generated_img_desc)
 
560
  )
561
 
562
  demo.load(lambda: secrets.token_hex(16), None, token)
563
+
564
  demo.queue(api_open=False, max_size=20, default_concurrency_limit=4).launch(show_api=False, allowed_paths=['/home/user/app'])