File size: 4,158 Bytes
372395e caed802 b19e4a9 f694503 04d2706 caed802 372395e caed802 372395e 41b5a1b eb1af87 b6e8417 cb934a1 8b282ff 41b5a1b cb934a1 36b4db6 41b5a1b 36b4db6 b5357a4 41b5a1b 372395e 79b4496 48c215d 79b4496 e7c2915 12bd467 e7c2915 79b4496 372395e 79b4496 372395e 41b5a1b 56173d6 8e6038a f694503 372395e 570b690 79b4496 53f5458 79b4496 fdef21e 41b5a1b bc6b39c 372395e a63d987 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
import os
import time
from moviepy.editor import *
from share_btn import community_icon_html, loading_icon_html, share_js
token = os.environ.get('HF_TOKEN')
caption = gr.Blocks.load(name="spaces/SRDdev/Image-Caption")
audio_gen = gr.Blocks.load(name="spaces/fffiloni/audioldm-text-to-audio-generation-clone", api_key=token)
ph_message="If you're not happy with sound result, you can manually describe the scene depicted in your image :)"
def infer(image_input, manual_caption, duration_in):
print(duration_in)
if manual_caption == "":
cap = caption(image_input, fn_index=0)
print("gpt2 caption: " + cap)
ph_update = "GP2 Caption: " + cap
else:
cap = manual_caption
print("manual captiony: " + cap)
ph_update=""
sound = audio_gen(cap, duration_in, 2.5, 45, 3, fn_index=0)
return cap, sound[1], gr.Textbox.update(placeholder=f"{ph_update} {ph_message}"), gr.Group.update(visible=True)
title = """
<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
Image to Sound Effect
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Convert an image to a corresponding sound effect generated through GPT2 Image Captioning & AudioLDM
</p>
</div>
"""
article = """
<div class="footer">
<p>
Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates π€
</p>
</div>
<div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
<p>You may also like: </p>
<div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
<svg height="20" width="208" style="margin-left:4px;margin-bottom: 6px;">
<a href="https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation" target="_blank">
<image href="https://img.shields.io/badge/π€ Spaces-AudioLDM_Text_to_Audio-blue" src="https://img.shields.io/badge/π€ Spaces-AudioLDM_Text_to_Audio-blue.png" height="20"/>
</a>
</svg>
<svg height="20" width="122" style="margin-left:4px;margin-bottom: 6px;">
<a href="https://huggingface.co/spaces/fffiloni/spectrogram-to-music" target="_blank">
<image href="https://img.shields.io/badge/π€ Spaces-Riffusion-blue" src="https://img.shields.io/badge/π€ Spaces-Riffusion-blue.png" height="20"/>
</a>
</svg>
</div>
</div>
"""
with gr.Blocks(css="style.css") as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
input_img = gr.Image(type="filepath", elem_id="input-img")
manual_cap = gr.Textbox(label="Manual Image description (optional)", lines=2, placeholder=ph_message)
duration_in = gr.Slider(minimum=5, maximum=30, step=5, value=10, label="Duration")
caption_output = gr.Textbox(label="Caption", visible=False, elem_id="text-caption")
sound_output = gr.Audio(label="Result", elem_id="sound-output")
generate = gr.Button("Generate SFX from Image")
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
community_icon = gr.HTML(community_icon_html)
loading_icon = gr.HTML(loading_icon_html)
share_button = gr.Button("Share to community", elem_id="share-btn")
gr.HTML(article)
generate.click(infer, inputs=[input_img, manual_cap, duration_in], outputs=[caption_output, sound_output, manual_cap, share_group], api_name="i2fx")
share_button.click(None, [], [], _js=share_js)
demo.queue(max_size=32).launch(debug=True)
|