Bils commited on
Commit
5b51bb7
·
verified ·
1 Parent(s): ea8ff59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -19
app.py CHANGED
@@ -9,6 +9,7 @@ from diffusers import DiffusionPipeline
9
  import google.generativeai as genai
10
  from pathlib import Path
11
 
 
12
  # Load environment variables from .env file
13
  load_dotenv()
14
 
@@ -23,12 +24,12 @@ def analyze_image_with_gemini(image_file):
23
  Analyzes an uploaded image with Gemini and generates a descriptive caption.
24
  """
25
  try:
26
- # Save uploaded image to a temporary file
27
  temp_image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name
28
  with open(temp_image_path, "wb") as temp_file:
29
  temp_file.write(image_file)
30
 
31
- # Prepare the image data and prompt for Gemini
32
  image_parts = [{"mime_type": "image/jpeg", "data": Path(temp_image_path).read_bytes()}]
33
  prompt_parts = ["Describe precisely the image in one sentence.\n", image_parts[0], "\n"]
34
  generation_config = {"temperature": 0.05, "top_p": 1, "top_k": 26, "max_output_tokens": 4096}
@@ -49,11 +50,11 @@ def get_audioldm_from_caption(caption):
49
  """
50
  Generates sound from a caption using the AudioLDM-2 model.
51
  """
52
- # Initialize the model
53
  pipe = DiffusionPipeline.from_pretrained("cvssp/audioldm2", use_auth_token=hf_token)
54
  pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
55
 
56
- # Generate audio from the caption
57
  audio_output = pipe(prompt=caption, num_inference_steps=50, guidance_scale=7.5)
58
  audio = audio_output.audios[0]
59
 
@@ -67,22 +68,22 @@ css="""
67
  #col-container{
68
  margin: 0 auto;
69
  max-width: 800px;
70
- }
 
71
  """
72
 
73
  # Gradio interface setup
74
- @spaces.GPU
75
  with gr.Blocks(css=css) as demo:
76
  # Main Title and App Description
77
  with gr.Column(elem_id="col-container"):
78
  gr.HTML("""
79
- <h1 style="text-align: center;">
80
- 🎶 Generate Sound Effects from Image
81
  </h1>
82
- <p style="text-align: center;">
83
- âš¡ Powered by <a href="https://bilsimaging.com" _blank >Bilsimaging</a>
84
  </p>
85
- """)
86
 
87
  gr.Markdown("""
88
  Welcome to this unique sound effect generator! This tool allows you to upload an image and generate a descriptive caption and a corresponding sound effect. Whether you're exploring the sound of nature, urban environments, or anything in between, this app brings your images to auditory life.
@@ -94,28 +95,26 @@ with gr.Blocks(css=css) as demo:
94
 
95
  Enjoy the journey from visual to auditory sensation with just a few clicks!
96
 
97
- For example demo sound effects generated, check out our [YouTube channel](https://www.youtube.com/playlist?list=PLwEbW4bdYBSC8exiJ9PfzufGND_14f--C)
98
  """)
99
 
100
- # Interface Components
101
  image_upload = gr.File(label="Upload Image", type="binary")
102
  generate_description_button = gr.Button("Tap to Generate a Description from your image")
103
  caption_display = gr.Textbox(label="Image Description", interactive=False) # Keep as read-only
104
  generate_sound_button = gr.Button("Generate Sound Effect")
105
  audio_output = gr.Audio(label="Generated Sound Effect")
106
-
107
- # Extra footer
108
  gr.Markdown("""## 👥 How You Can Contribute
109
  We welcome contributions and suggestions for improvements. Your feedback is invaluable to the continuous enhancement of this application.
110
 
111
  For support, questions, or to contribute, please contact us at [[email protected]](mailto:[email protected]).
112
 
113
  Support our work and get involved by donating through [Ko-fi](https://ko-fi.com/bilsimaging). - Bilel Aroua
114
- """)
115
  gr.Markdown("""## 📢 Stay Connected
116
- This app is a testament to the creative possibilities that emerge when technology meets art. Enjoy exploring the auditory landscape of your images!
117
- """)
118
-
119
  # Function to update the caption display based on the uploaded image
120
  def update_caption(image_file):
121
  description, _ = analyze_image_with_gemini(image_file)
@@ -138,5 +137,7 @@ with gr.Blocks(css=css) as demo:
138
  outputs=audio_output
139
  )
140
 
 
 
141
  # Launch the Gradio app
142
  demo.launch(debug=True, share=True)
 
9
  import google.generativeai as genai
10
  from pathlib import Path
11
 
12
+
13
  # Load environment variables from .env file
14
  load_dotenv()
15
 
 
24
  Analyzes an uploaded image with Gemini and generates a descriptive caption.
25
  """
26
  try:
27
+ # Save uploaded image to a temporary file
28
  temp_image_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name
29
  with open(temp_image_path, "wb") as temp_file:
30
  temp_file.write(image_file)
31
 
32
+ # Prepare the image data and prompt for Gemini
33
  image_parts = [{"mime_type": "image/jpeg", "data": Path(temp_image_path).read_bytes()}]
34
  prompt_parts = ["Describe precisely the image in one sentence.\n", image_parts[0], "\n"]
35
  generation_config = {"temperature": 0.05, "top_p": 1, "top_k": 26, "max_output_tokens": 4096}
 
50
  """
51
  Generates sound from a caption using the AudioLDM-2 model.
52
  """
53
+ # Initialize the model
54
  pipe = DiffusionPipeline.from_pretrained("cvssp/audioldm2", use_auth_token=hf_token)
55
  pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
56
 
57
+ # Generate audio from the caption
58
  audio_output = pipe(prompt=caption, num_inference_steps=50, guidance_scale=7.5)
59
  audio = audio_output.audios[0]
60
 
 
68
  #col-container{
69
  margin: 0 auto;
70
  max-width: 800px;
71
+ }
72
+
73
  """
74
 
75
  # Gradio interface setup
 
76
  with gr.Blocks(css=css) as demo:
77
  # Main Title and App Description
78
  with gr.Column(elem_id="col-container"):
79
  gr.HTML("""
80
+ <h1 style="text-align: center;">
81
+ 🎶 Generate Sound Effects from Image
82
  </h1>
83
+ <p style="text-align: center;">
84
+ âš¡ Powered by <a href="https://bilsimaging.com" _blank >Bilsimaging</a>
85
  </p>
86
+ """)
87
 
88
  gr.Markdown("""
89
  Welcome to this unique sound effect generator! This tool allows you to upload an image and generate a descriptive caption and a corresponding sound effect. Whether you're exploring the sound of nature, urban environments, or anything in between, this app brings your images to auditory life.
 
95
 
96
  Enjoy the journey from visual to auditory sensation with just a few clicks!
97
 
98
+ For Example Demos sound effects generated , check out our [YouTube channel](https://www.youtube.com/playlist?list=PLwEbW4bdYBSC8exiJ9PfzufGND_14f--C)
99
  """)
100
 
101
+ # Interface Components
102
  image_upload = gr.File(label="Upload Image", type="binary")
103
  generate_description_button = gr.Button("Tap to Generate a Description from your image")
104
  caption_display = gr.Textbox(label="Image Description", interactive=False) # Keep as read-only
105
  generate_sound_button = gr.Button("Generate Sound Effect")
106
  audio_output = gr.Audio(label="Generated Sound Effect")
107
+ # extra footer
 
108
  gr.Markdown("""## 👥 How You Can Contribute
109
  We welcome contributions and suggestions for improvements. Your feedback is invaluable to the continuous enhancement of this application.
110
 
111
  For support, questions, or to contribute, please contact us at [[email protected]](mailto:[email protected]).
112
 
113
  Support our work and get involved by donating through [Ko-fi](https://ko-fi.com/bilsimaging). - Bilel Aroua
114
+ """)
115
  gr.Markdown("""## 📢 Stay Connected
116
+ this app is a testament to the creative possibilities that emerge when technology meets art. Enjoy exploring the auditory landscape of your images!
117
+ """)
 
118
  # Function to update the caption display based on the uploaded image
119
  def update_caption(image_file):
120
  description, _ = analyze_image_with_gemini(image_file)
 
137
  outputs=audio_output
138
  )
139
 
140
+
141
+
142
  # Launch the Gradio app
143
  demo.launch(debug=True, share=True)