OpenSound commited on
Commit
f2549d2
·
verified ·
1 Parent(s): e6cf28b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -56
app.py CHANGED
@@ -1,3 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  os.system("bash setup.sh")
3
  import requests
@@ -692,15 +737,29 @@ if __name__ == "__main__":
692
 
693
  with gr.Row():
694
  with gr.Column(scale=2):
695
- input_audio = gr.Audio(value=f"{DEMO_PATH}/84_121550_000074_000000.wav", label="Input Audio", type="filepath", interactive=True)
 
 
 
 
 
696
  with gr.Group():
697
- original_transcript = gr.Textbox(label="Original transcript", lines=5, value="but when I had approached so near to them the common object, which the sense deceives, lost not by distance any of its marks.",
698
- info="Use whisperx model to get the transcript.")
 
 
 
 
699
  transcribe_btn = gr.Button(value="Transcribe")
700
 
701
  with gr.Column(scale=3):
702
  with gr.Group():
703
- transcript = gr.Textbox(label="Text", lines=7, value="but when I saw the mirage of the lake in the distance, which the sense deceives, lost not by distance any of its marks.", interactive=True)
 
 
 
 
 
704
  run_btn = gr.Button(value="Run")
705
 
706
  with gr.Column(scale=2):
@@ -720,26 +779,18 @@ if __name__ == "__main__":
720
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
721
 
722
  success_output = gr.HTML()
723
-
724
- semgents = gr.State() # not used
725
- state = gr.State() # not used
726
 
727
- audio_state = gr.State(value=f"{DEMO_PATH}/84_121550_000074_000000.wav")
728
- input_audio.change(
729
- lambda audio: audio,
730
  inputs=[input_audio],
731
- outputs=[audio_state]
732
  )
733
 
734
- transcribe_btn.click(fn=transcribe_en,
735
- inputs=[audio_state],
736
- outputs=[original_transcript, semgents, state, success_output])
737
-
738
  run_btn.click(fn=run_edit_en,
739
  inputs=[
740
  seed, sub_amount,
741
  aug_text, cfg_coef, cfg_stride, prompt_length,
742
- audio_state, original_transcript, transcript,
743
  ],
744
  outputs=[output_audio, success_output])
745
 
@@ -747,7 +798,7 @@ if __name__ == "__main__":
747
  inputs=[
748
  seed, sub_amount,
749
  aug_text, cfg_coef, cfg_stride, prompt_length,
750
- audio_state, original_transcript, transcript,
751
  ],
752
  outputs=[output_audio, success_output]
753
  )
@@ -784,25 +835,16 @@ if __name__ == "__main__":
784
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
785
 
786
  success_output = gr.HTML()
787
-
788
- semgents = gr.State() # not used
789
- state = gr.State() # not used
790
- audio_state = gr.State(value=f"{DEMO_PATH}/84_121550_000074_000000.wav")
791
- input_audio.change(
792
- lambda audio: audio,
793
- inputs=[input_audio],
794
- outputs=[audio_state]
795
- )
796
 
797
  transcribe_btn.click(fn=transcribe_en,
798
- inputs=[audio_state],
799
- outputs=[original_transcript, semgents, state, success_output])
800
 
801
  run_btn.click(fn=run_tts_en,
802
  inputs=[
803
  seed, sub_amount,
804
  aug_text, cfg_coef, cfg_stride, prompt_length,
805
- audio_state, original_transcript, transcript,
806
  ],
807
  outputs=[output_audio, success_output])
808
 
@@ -810,7 +852,7 @@ if __name__ == "__main__":
810
  inputs=[
811
  seed, sub_amount,
812
  aug_text, cfg_coef, cfg_stride, prompt_length,
813
- audio_state, original_transcript, transcript,
814
  ],
815
  outputs=[output_audio, success_output]
816
  )
@@ -847,25 +889,16 @@ if __name__ == "__main__":
847
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
848
 
849
  success_output = gr.HTML()
850
-
851
- semgents = gr.State() # not used
852
- state = gr.State() # not used
853
- audio_state = gr.State(value=f"{DEMO_PATH}/aishell3_test.wav")
854
- input_audio.change(
855
- lambda audio: audio,
856
- inputs=[input_audio],
857
- outputs=[audio_state]
858
- )
859
 
860
  transcribe_btn.click(fn=transcribe_zh,
861
- inputs=[audio_state],
862
- outputs=[original_transcript, semgents, state, success_output])
863
 
864
  run_btn.click(fn=run_edit_zh,
865
  inputs=[
866
  seed, sub_amount,
867
  aug_text, cfg_coef, cfg_stride, prompt_length,
868
- audio_state, original_transcript, transcript,
869
  ],
870
  outputs=[output_audio, success_output])
871
 
@@ -873,7 +906,7 @@ if __name__ == "__main__":
873
  inputs=[
874
  seed, sub_amount,
875
  aug_text, cfg_coef, cfg_stride, prompt_length,
876
- audio_state, original_transcript, transcript,
877
  ],
878
  outputs=[output_audio, success_output]
879
  )
@@ -910,25 +943,16 @@ if __name__ == "__main__":
910
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
911
 
912
  success_output = gr.HTML()
913
-
914
- semgents = gr.State() # not used
915
- state = gr.State() # not used
916
- audio_state = gr.State(value=f"{DEMO_PATH}/aishell3_test.wav")
917
- input_audio.change(
918
- lambda audio: audio,
919
- inputs=[input_audio],
920
- outputs=[audio_state]
921
- )
922
 
923
  transcribe_btn.click(fn=transcribe_zh,
924
- inputs=[audio_state],
925
- outputs=[original_transcript, semgents, state, success_output])
926
 
927
  run_btn.click(fn=run_tts_zh,
928
  inputs=[
929
  seed, sub_amount,
930
  aug_text, cfg_coef, cfg_stride, prompt_length,
931
- audio_state, original_transcript, transcript,
932
  ],
933
  outputs=[output_audio, success_output])
934
 
@@ -936,7 +960,7 @@ if __name__ == "__main__":
936
  inputs=[
937
  seed, sub_amount,
938
  aug_text, cfg_coef, cfg_stride, prompt_length,
939
- audio_state, original_transcript, transcript,
940
  ],
941
  outputs=[output_audio, success_output]
942
  )
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Posts
8
+ Docs
9
+ Enterprise
10
+ Pricing
11
+
12
+
13
+
14
+ Spaces:
15
+
16
+ OpenSound
17
+ /
18
+ SSR-Speech
19
+
20
+ private
21
+
22
+ Logs
23
+ App
24
+ Files
25
+ Community
26
+ Settings
27
+ SSR-Speech
28
+ /
29
+ app.py
30
+
31
+ OpenSound's picture
32
+ OpenSound
33
+ Update app.py
34
+ ce5a339
35
+ verified
36
+ 27 minutes ago
37
+ raw
38
+
39
+ Copy download link
40
+ history
41
+ blame
42
+ edit
43
+ delete
44
+
45
+ 41.4 kB
46
  import os
47
  os.system("bash setup.sh")
48
  import requests
 
737
 
738
  with gr.Row():
739
  with gr.Column(scale=2):
740
+ input_audio = gr.Audio(
741
+ value=f"{DEMO_PATH}/84_121550_000074_000000.wav",
742
+ label="Input Audio",
743
+ type="filepath",
744
+ interactive=True
745
+ )
746
  with gr.Group():
747
+ original_transcript = gr.Textbox(
748
+ label="Original transcript",
749
+ lines=5,
750
+ value="but when I had approached so near to them the common object, which the sense deceives, lost not by distance any of its marks.",
751
+ info="Use whisperx model to get the transcript."
752
+ )
753
  transcribe_btn = gr.Button(value="Transcribe")
754
 
755
  with gr.Column(scale=3):
756
  with gr.Group():
757
+ transcript = gr.Textbox(
758
+ label="Text",
759
+ lines=7,
760
+ value="but when I saw the mirage of the lake in the distance, which the sense deceives, lost not by distance any of its marks.",
761
+ interactive=True
762
+ )
763
  run_btn = gr.Button(value="Run")
764
 
765
  with gr.Column(scale=2):
 
779
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
780
 
781
  success_output = gr.HTML()
 
 
 
782
 
783
+ transcribe_btn.click(
784
+ fn=transcribe_en,
 
785
  inputs=[input_audio],
786
+ outputs=[original_transcript, gr.State(), gr.State(), success_output]
787
  )
788
 
 
 
 
 
789
  run_btn.click(fn=run_edit_en,
790
  inputs=[
791
  seed, sub_amount,
792
  aug_text, cfg_coef, cfg_stride, prompt_length,
793
+ input_audio, original_transcript, transcript,
794
  ],
795
  outputs=[output_audio, success_output])
796
 
 
798
  inputs=[
799
  seed, sub_amount,
800
  aug_text, cfg_coef, cfg_stride, prompt_length,
801
+ input_audio, original_transcript, transcript,
802
  ],
803
  outputs=[output_audio, success_output]
804
  )
 
835
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
836
 
837
  success_output = gr.HTML()
 
 
 
 
 
 
 
 
 
838
 
839
  transcribe_btn.click(fn=transcribe_en,
840
+ inputs=[input_audio],
841
+ outputs=[original_transcript, gr.State(), gr.State(), success_output])
842
 
843
  run_btn.click(fn=run_tts_en,
844
  inputs=[
845
  seed, sub_amount,
846
  aug_text, cfg_coef, cfg_stride, prompt_length,
847
+ input_audio, original_transcript, transcript,
848
  ],
849
  outputs=[output_audio, success_output])
850
 
 
852
  inputs=[
853
  seed, sub_amount,
854
  aug_text, cfg_coef, cfg_stride, prompt_length,
855
+ input_audio, original_transcript, transcript,
856
  ],
857
  outputs=[output_audio, success_output]
858
  )
 
889
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
890
 
891
  success_output = gr.HTML()
 
 
 
 
 
 
 
 
 
892
 
893
  transcribe_btn.click(fn=transcribe_zh,
894
+ inputs=[input_audio],
895
+ outputs=[original_transcript, gr.State(), gr.State(), success_output])
896
 
897
  run_btn.click(fn=run_edit_zh,
898
  inputs=[
899
  seed, sub_amount,
900
  aug_text, cfg_coef, cfg_stride, prompt_length,
901
+ input_audio, original_transcript, transcript,
902
  ],
903
  outputs=[output_audio, success_output])
904
 
 
906
  inputs=[
907
  seed, sub_amount,
908
  aug_text, cfg_coef, cfg_stride, prompt_length,
909
+ input_audio, original_transcript, transcript,
910
  ],
911
  outputs=[output_audio, success_output]
912
  )
 
943
  sub_amount = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
944
 
945
  success_output = gr.HTML()
 
 
 
 
 
 
 
 
 
946
 
947
  transcribe_btn.click(fn=transcribe_zh,
948
+ inputs=[input_audio],
949
+ outputs=[original_transcript, gr.State(), gr.State(), success_output])
950
 
951
  run_btn.click(fn=run_tts_zh,
952
  inputs=[
953
  seed, sub_amount,
954
  aug_text, cfg_coef, cfg_stride, prompt_length,
955
+ input_audio, original_transcript, transcript,
956
  ],
957
  outputs=[output_audio, success_output])
958
 
 
960
  inputs=[
961
  seed, sub_amount,
962
  aug_text, cfg_coef, cfg_stride, prompt_length,
963
+ input_audio, original_transcript, transcript,
964
  ],
965
  outputs=[output_audio, success_output]
966
  )