Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -5,37 +5,35 @@ import gradio as gr
|
|
5 |
from dotenv import load_dotenv
|
6 |
import torch
|
7 |
from scipy.io.wavfile import write
|
8 |
-
from diffusers import DiffusionPipeline
|
9 |
from transformers import pipeline
|
10 |
from pathlib import Path
|
11 |
|
12 |
load_dotenv()
|
13 |
hf_token = os.getenv("HF_TKN")
|
14 |
|
15 |
-
|
|
|
16 |
captioning_pipeline = pipeline(
|
17 |
"image-to-text",
|
18 |
-
model="nlpconnect/vit-gpt2-image-captioning"
|
|
|
19 |
)
|
20 |
|
21 |
pipe = DiffusionPipeline.from_pretrained(
|
22 |
"cvssp/audioldm2",
|
23 |
use_auth_token=hf_token
|
24 |
)
|
|
|
25 |
|
26 |
@spaces.GPU(duration=120)
|
27 |
def analyze_image_with_free_model(image_file):
|
28 |
try:
|
29 |
-
# Move captioning pipeline to GPU
|
30 |
-
captioning_pipeline.to("cuda")
|
31 |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
|
32 |
temp_file.write(image_file)
|
33 |
temp_image_path = temp_file.name
|
34 |
|
35 |
results = captioning_pipeline(temp_image_path)
|
36 |
-
# Move back to CPU (optional)
|
37 |
-
captioning_pipeline.to("cpu")
|
38 |
-
|
39 |
if not results or not isinstance(results, list):
|
40 |
return "Error: Could not generate caption.", True
|
41 |
|
@@ -50,7 +48,6 @@ def analyze_image_with_free_model(image_file):
|
|
50 |
@spaces.GPU(duration=120)
|
51 |
def get_audioldm_from_caption(caption):
|
52 |
try:
|
53 |
-
# Move AudioLDM pipeline to GPU
|
54 |
pipe.to("cuda")
|
55 |
audio_output = pipe(
|
56 |
prompt=caption,
|
@@ -61,7 +58,7 @@ def get_audioldm_from_caption(caption):
|
|
61 |
audio = audio_output.audios[0]
|
62 |
|
63 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
|
64 |
-
write(temp_wav.name, 16000, audio)
|
65 |
return temp_wav.name
|
66 |
|
67 |
except Exception as e:
|
@@ -78,12 +75,10 @@ css = """
|
|
78 |
with gr.Blocks(css=css) as demo:
|
79 |
with gr.Column(elem_id="col-container"):
|
80 |
gr.HTML("""
|
81 |
-
<h1 style="text-align: center;">
|
82 |
-
|
83 |
-
</
|
84 |
-
|
85 |
-
⚡ Powered by <a href="https://bilsimaging.com" target="_blank">Bilsimaging</a>
|
86 |
-
</p>
|
87 |
""")
|
88 |
|
89 |
gr.Markdown("""
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
import torch
|
7 |
from scipy.io.wavfile import write
|
8 |
+
from diffusers import DiffusionPipeline
|
9 |
from transformers import pipeline
|
10 |
from pathlib import Path
|
11 |
|
12 |
load_dotenv()
|
13 |
hf_token = os.getenv("HF_TKN")
|
14 |
|
15 |
+
device_id = 0 if torch.cuda.is_available() else -1
|
16 |
+
|
17 |
captioning_pipeline = pipeline(
|
18 |
"image-to-text",
|
19 |
+
model="nlpconnect/vit-gpt2-image-captioning",
|
20 |
+
device=device_id
|
21 |
)
|
22 |
|
23 |
pipe = DiffusionPipeline.from_pretrained(
|
24 |
"cvssp/audioldm2",
|
25 |
use_auth_token=hf_token
|
26 |
)
|
27 |
+
# The AudioLDM pipeline can be moved to CUDA/CPU explicitly inside the function.
|
28 |
|
29 |
@spaces.GPU(duration=120)
|
30 |
def analyze_image_with_free_model(image_file):
|
31 |
try:
|
|
|
|
|
32 |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
|
33 |
temp_file.write(image_file)
|
34 |
temp_image_path = temp_file.name
|
35 |
|
36 |
results = captioning_pipeline(temp_image_path)
|
|
|
|
|
|
|
37 |
if not results or not isinstance(results, list):
|
38 |
return "Error: Could not generate caption.", True
|
39 |
|
|
|
48 |
@spaces.GPU(duration=120)
|
49 |
def get_audioldm_from_caption(caption):
|
50 |
try:
|
|
|
51 |
pipe.to("cuda")
|
52 |
audio_output = pipe(
|
53 |
prompt=caption,
|
|
|
58 |
audio = audio_output.audios[0]
|
59 |
|
60 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
|
61 |
+
write(temp_wav.name, 16000, audio)
|
62 |
return temp_wav.name
|
63 |
|
64 |
except Exception as e:
|
|
|
75 |
with gr.Blocks(css=css) as demo:
|
76 |
with gr.Column(elem_id="col-container"):
|
77 |
gr.HTML("""
|
78 |
+
<h1 style="text-align: center;">🎶 Generate Sound Effects from Image</h1>
|
79 |
+
<p style="text-align: center;">
|
80 |
+
⚡ Powered by <a href="https://bilsimaging.com" target="_blank">Bilsimaging</a>
|
81 |
+
</p>
|
|
|
|
|
82 |
""")
|
83 |
|
84 |
gr.Markdown("""
|