Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -711,7 +711,55 @@ def start_session(request: gr.Request):
|
|
711 |
def check_box_clicked(adapative_tick):
|
712 |
print("checkbox clicked")
|
713 |
return gr.update(interactive=not adapative_tick)
|
714 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
715 |
|
716 |
css = """
|
717 |
#col-container {
|
@@ -762,6 +810,7 @@ with gr.Blocks(css=css) as demo:
|
|
762 |
|
763 |
image_input = gr.Image(label="Reference Image", type="filepath", height=512)
|
764 |
audio_input = ExtendedAudio(label="Input Audio", type="filepath", options=["EMPTY"], show_download_button=True)
|
|
|
765 |
|
766 |
|
767 |
with gr.Column():
|
@@ -771,8 +820,10 @@ with gr.Blocks(css=css) as demo:
|
|
771 |
|
772 |
time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
|
773 |
infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
|
774 |
-
|
775 |
-
|
|
|
|
|
776 |
|
777 |
with gr.Column():
|
778 |
|
@@ -875,6 +926,11 @@ with gr.Blocks(css=css) as demo:
|
|
875 |
audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
|
876 |
num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, text_input, num_steps, adaptive_text], outputs=[time_required, text_input])
|
877 |
adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
|
|
|
|
|
|
|
|
|
|
|
878 |
|
879 |
if __name__ == "__main__":
|
880 |
demo.unload(cleanup)
|
|
|
711 |
def check_box_clicked(adapative_tick):
|
712 |
print("checkbox clicked")
|
713 |
return gr.update(interactive=not adapative_tick)
|
714 |
+
|
715 |
+
def preprocess_audio_first_5s_librosa(audio_path, limit_on, session_id=None):
|
716 |
+
"""
|
717 |
+
If the uploaded audio is < 5s, return it unchanged.
|
718 |
+
If it's >= 5s, trim to the first 5s and return the trimmed WAV path.
|
719 |
+
"""
|
720 |
+
|
721 |
+
if not limit_on:
|
722 |
+
return audio_path
|
723 |
+
if not audio_path:
|
724 |
+
return None
|
725 |
+
|
726 |
+
# Robust duration check (librosa changed arg name across versions)
|
727 |
+
try:
|
728 |
+
dur = librosa.get_duration(path=audio_path)
|
729 |
+
except TypeError:
|
730 |
+
dur = librosa.get_duration(filename=audio_path)
|
731 |
+
|
732 |
+
# Small tolerance to avoid re-encoding 4.9999s files
|
733 |
+
if dur < 5.0 - 1e-3:
|
734 |
+
return audio_path
|
735 |
+
|
736 |
+
if session_id is None:
|
737 |
+
session_id = uuid.uuid4().hex
|
738 |
+
|
739 |
+
# Where we'll store per-session processed audio
|
740 |
+
output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
|
741 |
+
audio_dir = os.path.join(output_dir, "audio")
|
742 |
+
os.makedirs(audio_dir, exist_ok=True)
|
743 |
+
|
744 |
+
trimmed_path = os.path.join(audio_dir, "audio_input_5s.wav")
|
745 |
+
sr = getattr(args, "sample_rate", 16000)
|
746 |
+
|
747 |
+
# Load exactly the first 5s as mono at target sample rate
|
748 |
+
y, _ = librosa.load(audio_path, sr=sr, mono=True, duration=5.0)
|
749 |
+
|
750 |
+
# Save as 16-bit PCM mono WAV
|
751 |
+
waveform = torch.from_numpy(y).unsqueeze(0) # [1, num_samples]
|
752 |
+
torchaudio.save(
|
753 |
+
trimmed_path,
|
754 |
+
waveform,
|
755 |
+
sr,
|
756 |
+
encoding="PCM_S",
|
757 |
+
bits_per_sample=16,
|
758 |
+
format="wav",
|
759 |
+
)
|
760 |
+
|
761 |
+
return trimmed_path
|
762 |
+
|
763 |
|
764 |
css = """
|
765 |
#col-container {
|
|
|
810 |
|
811 |
image_input = gr.Image(label="Reference Image", type="filepath", height=512)
|
812 |
audio_input = ExtendedAudio(label="Input Audio", type="filepath", options=["EMPTY"], show_download_button=True)
|
813 |
+
gr.Markdown("*A 5-second limit is applied to audio files to shorten generation time. You can turn this off in Advanced Settings*")
|
814 |
|
815 |
|
816 |
with gr.Column():
|
|
|
820 |
|
821 |
time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
|
822 |
infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
|
823 |
+
with gr.Accordion("Advanced Settings", open=False):
|
824 |
+
limit_on = gr.Checkbox(label="Limit Audio files to 5 seconds", value=True)
|
825 |
+
adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
|
826 |
+
text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
|
827 |
|
828 |
with gr.Column():
|
829 |
|
|
|
926 |
audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
|
927 |
num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, text_input, num_steps, adaptive_text], outputs=[time_required, text_input])
|
928 |
adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
|
929 |
+
audio_input.upload(
|
930 |
+
fn=preprocess_audio_first_5s_librosa,
|
931 |
+
inputs=[audio_input, limit_on, session_state],
|
932 |
+
outputs=[audio_input],
|
933 |
+
)
|
934 |
|
935 |
if __name__ == "__main__":
|
936 |
demo.unload(cleanup)
|