alexnasa commited on
Commit
9cfe43e
·
verified ·
1 Parent(s): 3df2527

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -3
app.py CHANGED
@@ -711,7 +711,55 @@ def start_session(request: gr.Request):
711
  def check_box_clicked(adapative_tick):
712
  print("checkbox clicked")
713
  return gr.update(interactive=not adapative_tick)
714
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
 
716
  css = """
717
  #col-container {
@@ -762,6 +810,7 @@ with gr.Blocks(css=css) as demo:
762
 
763
  image_input = gr.Image(label="Reference Image", type="filepath", height=512)
764
  audio_input = ExtendedAudio(label="Input Audio", type="filepath", options=["EMPTY"], show_download_button=True)
 
765
 
766
 
767
  with gr.Column():
@@ -771,8 +820,10 @@ with gr.Blocks(css=css) as demo:
771
 
772
  time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
773
  infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
774
- adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
775
- text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
 
 
776
 
777
  with gr.Column():
778
 
@@ -875,6 +926,11 @@ with gr.Blocks(css=css) as demo:
875
  audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
876
  num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, text_input, num_steps, adaptive_text], outputs=[time_required, text_input])
877
  adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
 
 
 
 
 
878
 
879
  if __name__ == "__main__":
880
  demo.unload(cleanup)
 
711
  def check_box_clicked(adapative_tick):
712
  print("checkbox clicked")
713
  return gr.update(interactive=not adapative_tick)
714
+
715
+ def preprocess_audio_first_5s_librosa(audio_path, limit_on, session_id=None):
716
+ """
717
+ If the uploaded audio is < 5s, return it unchanged.
718
+ If it's >= 5s, trim to the first 5s and return the trimmed WAV path.
719
+ """
720
+
721
+ if not limit_on:
722
+ return audio_path
723
+ if not audio_path:
724
+ return None
725
+
726
+ # Robust duration check (librosa changed arg name across versions)
727
+ try:
728
+ dur = librosa.get_duration(path=audio_path)
729
+ except TypeError:
730
+ dur = librosa.get_duration(filename=audio_path)
731
+
732
+ # Small tolerance to avoid re-encoding 4.9999s files
733
+ if dur < 5.0 - 1e-3:
734
+ return audio_path
735
+
736
+ if session_id is None:
737
+ session_id = uuid.uuid4().hex
738
+
739
+ # Where we'll store per-session processed audio
740
+ output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
741
+ audio_dir = os.path.join(output_dir, "audio")
742
+ os.makedirs(audio_dir, exist_ok=True)
743
+
744
+ trimmed_path = os.path.join(audio_dir, "audio_input_5s.wav")
745
+ sr = getattr(args, "sample_rate", 16000)
746
+
747
+ # Load exactly the first 5s as mono at target sample rate
748
+ y, _ = librosa.load(audio_path, sr=sr, mono=True, duration=5.0)
749
+
750
+ # Save as 16-bit PCM mono WAV
751
+ waveform = torch.from_numpy(y).unsqueeze(0) # [1, num_samples]
752
+ torchaudio.save(
753
+ trimmed_path,
754
+ waveform,
755
+ sr,
756
+ encoding="PCM_S",
757
+ bits_per_sample=16,
758
+ format="wav",
759
+ )
760
+
761
+ return trimmed_path
762
+
763
 
764
  css = """
765
  #col-container {
 
810
 
811
  image_input = gr.Image(label="Reference Image", type="filepath", height=512)
812
  audio_input = ExtendedAudio(label="Input Audio", type="filepath", options=["EMPTY"], show_download_button=True)
813
+ gr.Markdown("*A 5-second limit is applied to audio files to shorten generation time. You can turn this off in Advanced Settings*")
814
 
815
 
816
  with gr.Column():
 
820
 
821
  time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
822
  infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
823
+ with gr.Accordion("Advanced Settings", open=False):
824
+ limit_on = gr.Checkbox(label="Limit Audio files to 5 seconds", value=True)
825
+ adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
826
+ text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
827
 
828
  with gr.Column():
829
 
 
926
  audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
927
  num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, text_input, num_steps, adaptive_text], outputs=[time_required, text_input])
928
  adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
929
+ audio_input.upload(
930
+ fn=preprocess_audio_first_5s_librosa,
931
+ inputs=[audio_input, limit_on, session_state],
932
+ outputs=[audio_input],
933
+ )
934
 
935
  if __name__ == "__main__":
936
  demo.unload(cleanup)