ylacombe commited on
Commit
472b521
·
1 Parent(s): 9670638

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client
2
+ import torch
3
+ import nltk # we'll use this to split into sentences
4
+ import numpy as np
5
+ from transformers import BarkModel, AutoProcessor
6
+
7
+ import gradio as gr
8
+
9
+ def _grab_best_device(use_gpu=True):
10
+ if torch.cuda.device_count() > 0 and use_gpu:
11
+ device = "cuda"
12
+ else:
13
+ device = "cpu"
14
+ return device
15
+
16
+ device = _grab_best_device()
17
+
18
+
19
+ BATCH_SIZE = 8
20
+ SYST_PROMPT="""You're the storyteller, crafting a short tale for young listeners. Please abide by these guidelines:
21
+ - Keep your sentences concise and easy to understand.
22
+ - There should be only the narrator speaking. No dialogues."""
23
+
24
+ #story_prompt = "A panda going on an adventure with a caterpillar. This is a story teaching a wonderful life lesson."
25
+ story_prompt = "A princess breaks free from a dragon's grip. This evocates women empowerement and freedom."
26
+ temperature = 0.9
27
+ top_p = 0.6
28
+ repetition_penalty = 1.2
29
+
30
+ text_client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
31
+ image_client = Client("prodia/fast-stable-diffusion")
32
+ image_negative_prompt = "ultrarealistic, soft lighting, 8k, ugly"
33
+ image_positive_prompt = ". Cartoon, anime"
34
+ image_seed = 9
35
+
36
+ processor = AutoProcessor.from_pretrained("suno/bark")
37
+ model = BarkModel.from_pretrained("suno/bark", torch_dtype=torch.float16).to(device)
38
+ sampling_rate = model.generation_config.sample_rate
39
+ silence = np.zeros(int(0.25 * sampling_rate)) # quarter second of silence
40
+ voice_preset = "v2/en_speaker_6"
41
+
42
+ # convert to bettertransformer
43
+ model = model.to_bettertransformer()
44
+
45
+ # enable CPU offload
46
+ model.enable_cpu_offload()
47
+
48
+
49
+
50
+ def generate_audio_and_image(story_prompt, voice_preset=voice_preset):
51
+
52
+
53
+ story = text_client.predict(
54
+ story_prompt,
55
+ SYST_PROMPT,
56
+ temperature,
57
+ 4096,
58
+ temperature,
59
+ repetition_penalty,
60
+ api_name="/chat"
61
+ )
62
+
63
+ print(story)
64
+
65
+ model_input = story.replace("\n", " ").strip()
66
+ model_input = nltk.sent_tokenize(model_input)
67
+
68
+ pieces = []
69
+ for i in range(0, len(model_input), BATCH_SIZE):
70
+ inputs = model_input[BATCH_SIZE*i:min(BATCH_SIZE*(i+1), len(model_input))]
71
+
72
+ if len(inputs) != 0:
73
+ inputs = processor(inputs, voice_preset=voice_preset)
74
+
75
+ speech_output = model.generate(**inputs.to(device)).cpu().numpy()
76
+
77
+ pieces += [*speech_output, silence.copy()]
78
+
79
+ #job_img = image_client.submit(
80
+ # story_prompt+image_positive_prompt, # str in 'parameter_11' Textbox component
81
+ # image_negative_prompt, # str in 'parameter_12' Textbox component
82
+ # "absolutereality_v181.safetensors [3d9d4d2b]", # str (Option from: ['absolutereality_V16.safetensors [37db0fc3]', 'absolutereality_v181.safetensors [3d9d4d2b]', 'analog-diffusion-1.0.ckpt [9ca13f02]', 'anythingv3_0-pruned.ckpt [2700c435]', 'anything-v4.5-pruned.ckpt [65745d25]', 'anythingV5_PrtRE.safetensors [893e49b9]', 'AOM3A3_orangemixs.safetensors [9600da17]', 'childrensStories_v13D.safetensors [9dfaabcb]', 'childrensStories_v1SemiReal.safetensors [a1c56dbb]', 'childrensStories_v1ToonAnime.safetensors [2ec7b88b]', 'cyberrealistic_v33.safetensors [82b0d085]', 'deliberate_v2.safetensors [10ec4b29]', 'deliberate_v3.safetensors [afd9d2d4]', 'dreamlike-anime-1.0.safetensors [4520e090]', 'dreamlike-diffusion-1.0.safetensors [5c9fd6e0]', 'dreamlike-photoreal-2.0.safetensors [fdcf65e7]', 'dreamshaper_6BakedVae.safetensors [114c8abb]', 'dreamshaper_7.safetensors [5cf5ae06]', 'dreamshaper_8.safetensors [9d40847d]', 'edgeOfRealism_eorV20.safetensors [3ed5de15]', 'EimisAnimeDiffusion_V1.ckpt [4f828a15]', 'elldreths-vivid-mix.safetensors [342d9d26]', 'epicrealism_naturalSinRC1VAE.safetensors [90a4c676]', 'ICantBelieveItsNotPhotography_seco.safetensors [4e7a3dfd]', 'juggernaut_aftermath.safetensors [5e20c455]', 'lyriel_v16.safetensors [68fceea2]', 'mechamix_v10.safetensors [ee685731]', 'meinamix_meinaV9.safetensors [2ec66ab0]', 'meinamix_meinaV11.safetensors [b56ce717]', 'openjourney_V4.ckpt [ca2f377f]', 'portraitplus_V1.0.safetensors [1400e684]', 'Realistic_Vision_V1.4-pruned-fp16.safetensors [8d21810b]', 'Realistic_Vision_V2.0.safetensors [79587710]', 'Realistic_Vision_V4.0.safetensors [29a7afaa]', 'Realistic_Vision_V5.0.safetensors [614d1063]', 'redshift_diffusion-V10.safetensors [1400e684]', 'revAnimated_v122.safetensors [3f4fefd9]', 'rundiffusionFX25D_v10.safetensors [cd12b0ee]', 'rundiffusionFX_v10.safetensors [cd4e694d]', 'sdv1_4.ckpt [7460a6fa]', 'v1-5-pruned-emaonly.safetensors [d7049739]', 'shoninsBeautiful_v10.safetensors [25d8c546]', 'theallys-mix-ii-churned.safetensors [5d9225a4]', 'timeless-1.0.ckpt [7c4971d4]', 'toonyou_beta6.safetensors [980f6b15]'])
83
+ # 25,
84
+ # "Euler a",
85
+ # 7,
86
+ # 512,
87
+ # 512,
88
+ # image_seed,
89
+ # "https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png,https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png", # str (path to directory with images and a file associating images with captions called captions.json)
90
+ # fn_index=0
91
+ #)
92
+
93
+
94
+ #img = job_img.result()
95
+
96
+ return story, (sampling_rate, np.concatenate(pieces))
97
+
98
+
99
+
100
+
101
+ # Gradio blocks demo
102
+ with gr.Blocks() as demo_blocks:
103
+ gr.Markdown("""<h1 align="center">🐶Children story<</h1>""")
104
+ gr.HTML("""<h3 style="text-align:center;">📢Audio Streaming powered by Gradio (v3.40.0 onwards)🦾! </h3>""")
105
+ with gr.Group():
106
+ with gr.Row():
107
+ inp_text = gr.Textbox(label="Story prompt", info="Enter text here")
108
+ #dd = gr.Dropdown(
109
+ # speaker_embeddings,
110
+ # value=None,
111
+ # label="Available voice presets",
112
+ # info="Defaults to no speaker embeddings!"
113
+ # )
114
+
115
+
116
+ with gr.Row():
117
+ btn = gr.Button("Create a story")
118
+
119
+ with gr.Row():
120
+ out_audio = gr.Audio(
121
+ streaming=False, autoplay=True) # needed to stream output audio
122
+ out_text = gr.Text()
123
+ btn.click(generate_audio_and_image, [inp_text], [out_text, out_audio] ) #[out_audio]) #, out_count])
124
+
125
+ demo_blocks.queue().launch(debug=True)