import gradio as gr import numpy as np from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip import tempfile import os def create_waveform_video(image, audio): # Save uploaded files temporarily image_path = tempfile.mktemp(suffix=".png") audio_path = tempfile.mktemp(suffix=".mp3") video_path = tempfile.mktemp(suffix=".mp4") image.save(image_path) audio.save(audio_path) # Load audio audio_clip = AudioFileClip(audio_path) # Load image and create a video clip of the same duration as the audio img_clip = ImageClip(image_path).set_duration(audio_clip.duration) img_clip = img_clip.resize(height=720) # Resize image to a suitable video height # Generate waveform effect as a video waveform = audio_clip.to_soundarray(fps=22050) # Convert audio to waveform array waveform = waveform.mean(axis=1) # Convert stereo to mono by averaging channels # Normalize waveform for overlay waveform = np.interp(waveform, (waveform.min(), waveform.max()), (-1, 1)) # Overlay waveform on image (optional customization: size and position of waveform) waveform_clip = ImageClip(image_path).set_duration(audio_clip.duration) waveform_clip = waveform_clip.fx(CompositeVideoClip, [[waveform, 'center', 0.8]]) # Combine image and audio into one video final_clip = CompositeVideoClip([img_clip, waveform_clip.set_audio(audio_clip)]) final_clip.write_videofile(video_path, codec="libx264", fps=24) # Cleanup temporary files os.remove(image_path) os.remove(audio_path) return video_path iface = gr.Interface( fn=create_waveform_video, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Audio(type="file", label="Upload Audio") ], outputs=gr.Video(label="Generated Video"), title="Image + Audio to Video with Waveform Overlay" ) iface.launch()