Bils commited on
Commit
722b7cd
·
verified ·
1 Parent(s): 4d98101

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -20
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  # Import necessary libraries
2
  import os
3
  import tempfile
@@ -9,7 +12,6 @@ from diffusers import DiffusionPipeline
9
  import google.generativeai as genai
10
  from pathlib import Path
11
 
12
-
13
  # Load environment variables from .env file
14
  load_dotenv()
15
 
@@ -24,12 +26,12 @@ def analyze_image_with_gemini(image_file):
24
  Analyzes an uploaded image with Gemini and generates a descriptive caption.
25
  """
26
  try:
27
- # Save uploaded image to a temporary file
28
  temp_image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name
29
  with open(temp_image_path, "wb") as temp_file:
30
  temp_file.write(image_file)
31
 
32
- # Prepare the image data and prompt for Gemini
33
  image_parts = [{"mime_type": "image/jpeg", "data": Path(temp_image_path).read_bytes()}]
34
  prompt_parts = ["Describe precisely the image in one sentence.\n", image_parts[0], "\n"]
35
  generation_config = {"temperature": 0.05, "top_p": 1, "top_k": 26, "max_output_tokens": 4096}
@@ -50,11 +52,11 @@ def get_audioldm_from_caption(caption):
50
  """
51
  Generates sound from a caption using the AudioLDM-2 model.
52
  """
53
- # Initialize the model
54
  pipe = DiffusionPipeline.from_pretrained("cvssp/audioldm2", use_auth_token=hf_token)
55
  pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
56
 
57
- # Generate audio from the caption
58
  audio_output = pipe(prompt=caption, num_inference_steps=50, guidance_scale=7.5)
59
  audio = audio_output.audios[0]
60
 
@@ -68,22 +70,22 @@ css="""
68
  #col-container{
69
  margin: 0 auto;
70
  max-width: 800px;
71
- }
72
-
73
  """
74
 
75
  # Gradio interface setup
 
76
  with gr.Blocks(css=css) as demo:
77
  # Main Title and App Description
78
  with gr.Column(elem_id="col-container"):
79
  gr.HTML("""
80
- <h1 style="text-align: center;">
81
- 🎶 Generate Sound Effects from Image
82
  </h1>
83
- <p style="text-align: center;">
84
- âš¡ Powered by <a href="https://bilsimaging.com" _blank >Bilsimaging</a>
85
  </p>
86
- """)
87
 
88
  gr.Markdown("""
89
  Welcome to this unique sound effect generator! This tool allows you to upload an image and generate a descriptive caption and a corresponding sound effect. Whether you're exploring the sound of nature, urban environments, or anything in between, this app brings your images to auditory life.
@@ -95,26 +97,28 @@ with gr.Blocks(css=css) as demo:
95
 
96
  Enjoy the journey from visual to auditory sensation with just a few clicks!
97
 
98
- For Example Demos sound effects generated , check out our [YouTube channel](https://www.youtube.com/playlist?list=PLwEbW4bdYBSC8exiJ9PfzufGND_14f--C)
99
  """)
100
 
101
- # Interface Components
102
  image_upload = gr.File(label="Upload Image", type="binary")
103
  generate_description_button = gr.Button("Tap to Generate a Description from your image")
104
  caption_display = gr.Textbox(label="Image Description", interactive=False) # Keep as read-only
105
  generate_sound_button = gr.Button("Generate Sound Effect")
106
  audio_output = gr.Audio(label="Generated Sound Effect")
107
- # extra footer
 
108
  gr.Markdown("""## 👥 How You Can Contribute
109
  We welcome contributions and suggestions for improvements. Your feedback is invaluable to the continuous enhancement of this application.
110
 
111
  For support, questions, or to contribute, please contact us at [[email protected]](mailto:[email protected]).
112
 
113
  Support our work and get involved by donating through [Ko-fi](https://ko-fi.com/bilsimaging). - Bilel Aroua
114
- """)
115
  gr.Markdown("""## 📢 Stay Connected
116
- this app is a testament to the creative possibilities that emerge when technology meets art. Enjoy exploring the auditory landscape of your images!
117
- """)
 
118
  # Function to update the caption display based on the uploaded image
119
  def update_caption(image_file):
120
  description, _ = analyze_image_with_gemini(image_file)
@@ -137,7 +141,5 @@ with gr.Blocks(css=css) as demo:
137
  outputs=audio_output
138
  )
139
 
140
-
141
-
142
  # Launch the Gradio app
143
  demo.launch(debug=True, share=True)
 
1
+ To add zero GPU usage on Hugging Face Spaces, you can specify the `@spaces.GPU` decorator in your Gradio app to indicate that no GPU resources are required. Here is how you can modify your script to include this:
2
+
3
+ ```python
4
  # Import necessary libraries
5
  import os
6
  import tempfile
 
12
  import google.generativeai as genai
13
  from pathlib import Path
14
 
 
15
  # Load environment variables from .env file
16
  load_dotenv()
17
 
 
26
  Analyzes an uploaded image with Gemini and generates a descriptive caption.
27
  """
28
  try:
29
+ # Save uploaded image to a temporary file
30
  temp_image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name
31
  with open(temp_image_path, "wb") as temp_file:
32
  temp_file.write(image_file)
33
 
34
+ # Prepare the image data and prompt for Gemini
35
  image_parts = [{"mime_type": "image/jpeg", "data": Path(temp_image_path).read_bytes()}]
36
  prompt_parts = ["Describe precisely the image in one sentence.\n", image_parts[0], "\n"]
37
  generation_config = {"temperature": 0.05, "top_p": 1, "top_k": 26, "max_output_tokens": 4096}
 
52
  """
53
  Generates sound from a caption using the AudioLDM-2 model.
54
  """
55
+ # Initialize the model
56
  pipe = DiffusionPipeline.from_pretrained("cvssp/audioldm2", use_auth_token=hf_token)
57
  pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
58
 
59
+ # Generate audio from the caption
60
  audio_output = pipe(prompt=caption, num_inference_steps=50, guidance_scale=7.5)
61
  audio = audio_output.audios[0]
62
 
 
70
  #col-container{
71
  margin: 0 auto;
72
  max-width: 800px;
73
+ }
 
74
  """
75
 
76
  # Gradio interface setup
77
+ @spaces.GPU
78
  with gr.Blocks(css=css) as demo:
79
  # Main Title and App Description
80
  with gr.Column(elem_id="col-container"):
81
  gr.HTML("""
82
+ <h1 style="text-align: center;">
83
+ 🎶 Generate Sound Effects from Image
84
  </h1>
85
+ <p style="text-align: center;">
86
+ âš¡ Powered by <a href="https://bilsimaging.com" _blank >Bilsimaging</a>
87
  </p>
88
+ """)
89
 
90
  gr.Markdown("""
91
  Welcome to this unique sound effect generator! This tool allows you to upload an image and generate a descriptive caption and a corresponding sound effect. Whether you're exploring the sound of nature, urban environments, or anything in between, this app brings your images to auditory life.
 
97
 
98
  Enjoy the journey from visual to auditory sensation with just a few clicks!
99
 
100
+ For example demo sound effects generated, check out our [YouTube channel](https://www.youtube.com/playlist?list=PLwEbW4bdYBSC8exiJ9PfzufGND_14f--C)
101
  """)
102
 
103
+ # Interface Components
104
  image_upload = gr.File(label="Upload Image", type="binary")
105
  generate_description_button = gr.Button("Tap to Generate a Description from your image")
106
  caption_display = gr.Textbox(label="Image Description", interactive=False) # Keep as read-only
107
  generate_sound_button = gr.Button("Generate Sound Effect")
108
  audio_output = gr.Audio(label="Generated Sound Effect")
109
+
110
+ # Extra footer
111
  gr.Markdown("""## 👥 How You Can Contribute
112
  We welcome contributions and suggestions for improvements. Your feedback is invaluable to the continuous enhancement of this application.
113
 
114
  For support, questions, or to contribute, please contact us at [[email protected]](mailto:[email protected]).
115
 
116
  Support our work and get involved by donating through [Ko-fi](https://ko-fi.com/bilsimaging). - Bilel Aroua
117
+ """)
118
  gr.Markdown("""## 📢 Stay Connected
119
+ This app is a testament to the creative possibilities that emerge when technology meets art. Enjoy exploring the auditory landscape of your images!
120
+ """)
121
+
122
  # Function to update the caption display based on the uploaded image
123
  def update_caption(image_file):
124
  description, _ = analyze_image_with_gemini(image_file)
 
141
  outputs=audio_output
142
  )
143
 
 
 
144
  # Launch the Gradio app
145
  demo.launch(debug=True, share=True)