Spaces:

1littlecoder
/

make_waveform

Runtime error

App Files Files Community

1littlecoder commited on Oct 31, 2024

Commit

e39102f

verified ·

1 Parent(s): 5a1209e

Create app.py

Browse files

Files changed (1) hide show

app.py +172 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import gradio as gr
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation
+import tempfile
+import os
+import shutil
+import subprocess
+from typing import Any
+import PIL
+import processing_utils  # Import or define your custom processing utilities
+def make_waveform(
+    audio: tuple[int, np.ndarray],
+    bg_color: str = "#f3f4f6",
+    bg_image: str | None = None,
+    fg_alpha: float = 0.75,
+    bars_color: str | tuple[str, str] = ("#fbbf24", "#ea580c"),
+    bar_count: int = 50,
+    bar_width: float = 0.6,
+    animate: bool = False,
+) -> str:
+    if isinstance(audio, str):
+        audio_file = audio
+        audio = processing_utils.audio_from_file(audio)
+    else:
+        tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        processing_utils.audio_to_file(audio[0], audio[1], tmp_wav.name, format="wav")
+        audio_file = tmp_wav.name
+    if not os.path.isfile(audio_file):
+        raise ValueError("Audio file not found.")
+    ffmpeg = shutil.which("ffmpeg")
+    if not ffmpeg:
+        raise RuntimeError("ffmpeg not found.")
+    duration = round(len(audio[1]) / audio[0], 4)
+    def hex_to_rgb(hex_str):
+        return [int(hex_str[i : i + 2], 16) for i in range(1, 6, 2)]
+    def get_color_gradient(c1, c2, n):
+        if n < 1:
+            raise ValueError("Must have at least one stop in gradient")
+        c1_rgb = np.array(hex_to_rgb(c1)) / 255
+        c2_rgb = np.array(hex_to_rgb(c2)) / 255
+        mix_pcts = [x / (n - 1) for x in range(n)]
+        rgb_colors = [((1 - mix) * c1_rgb + (mix * c2_rgb)) for mix in mix_pcts]
+        return [
+            "#" + "".join(f"{int(round(val * 255)):02x}" for val in item)
+            for item in rgb_colors
+        ]
+    samples = audio[1]
+    if len(samples.shape) > 1:
+        samples = np.mean(samples, 1)
+    bins_to_pad = bar_count - (len(samples) % bar_count)
+    samples = np.pad(samples, [(0, bins_to_pad)])
+    samples = np.reshape(samples, (bar_count, -1))
+    samples = np.abs(samples)
+    samples = np.max(samples, 1)
+    color = (
+        bars_color
+        if isinstance(bars_color, str)
+        else get_color_gradient(bars_color[0], bars_color[1], bar_count)
+    )
+    fig = plt.figure(figsize=(5, 1), dpi=200, frameon=False)
+    plt.axis("off")
+    plt.margins(x=0)
+    bar_alpha = fg_alpha if animate else 1.0
+    barcollection = plt.bar(
+        np.arange(0, bar_count),
+        samples * 2,
+        bottom=(-1 * samples),
+        width=bar_width,
+        color=color,
+        alpha=bar_alpha,
+    )
+    tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+    savefig_kwargs: dict[str, Any] = {"bbox_inches": "tight"}
+    if bg_image is not None:
+        savefig_kwargs["transparent"] = True
+    else:
+        savefig_kwargs["facecolor"] = bg_color
+    plt.savefig(tmp_img.name, **savefig_kwargs)
+    if not animate:
+        waveform_img = PIL.Image.open(tmp_img.name)
+        waveform_img.save(tmp_img.name)
+    else:
+        def _animate(_):
+            for idx, b in enumerate(barcollection):
+                rand_height = np.random.uniform(0.8, 1.2)
+                b.set_height(samples[idx] * rand_height * 2)
+                b.set_y((-rand_height * samples)[idx])
+        frames = int(duration * 10)
+        anim = FuncAnimation(
+            fig,
+            _animate,
+            repeat=False,
+            blit=False,
+            frames=frames,
+            interval=100,
+        )
+        anim.save(tmp_img.name, writer="pillow", fps=10, codec="png", savefig_kwargs=savefig_kwargs)
+    output_mp4 = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    ffmpeg_cmd = [
+        ffmpeg,
+        "-loop",
+        "1",
+        "-i",
+        tmp_img.name,
+        "-i",
+        audio_file,
+        "-vf",
+        f"color=c=#FFFFFF77:s=1000x400[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1",
+        "-t",
+        str(duration),
+        "-y",
+        output_mp4.name,
+    ]
+    subprocess.check_call(ffmpeg_cmd)
+    return output_mp4.name
+# Gradio app
+def generate_waveform(audio, bg_color, fg_alpha, bars_color, bar_count, bar_width, animate):
+    try:
+        video_path = make_waveform(
+            audio=(audio[0], np.array(audio[1])),
+            bg_color=bg_color,
+            fg_alpha=fg_alpha,
+            bars_color=bars_color,
+            bar_count=bar_count,
+            bar_width=bar_width,
+            animate=animate
+        )
+        return video_path
+    except Exception as e:
+        return str(e)
+with gr.Blocks() as demo:
+    gr.Markdown("### Audio Waveform Generator")
+    with gr.Row():
+        audio_input = gr.Audio(label="Upload Audio", source="upload", type="numpy")
+        video_output = gr.Video(label="Waveform Video")
+    with gr.Row():
+        bg_color = gr.ColorPicker(label="Background Color", value="#f3f4f6")
+        fg_alpha = gr.Slider(label="Foreground Opacity", minimum=0.1, maximum=1.0, value=0.75)
+        bar_count = gr.Slider(label="Number of Bars", minimum=10, maximum=100, step=1, value=50)
+        bar_width = gr.Slider(label="Bar Width", minimum=0.1, maximum=1.0, value=0.6)
+        bars_color = gr.ColorPicker(label="Bars Color", value="#fbbf24")
+        animate = gr.Checkbox(label="Animate", value=False)
+    generate_button = gr.Button("Generate Waveform")
+    generate_button.click(
+        generate_waveform,
+        inputs=[audio_input, bg_color, fg_alpha, bars_color, bar_count, bar_width, animate],
+        outputs=video_output
+    )
+demo.launch(debug = True)