alibabasglab commited on
Commit
963e3bf
·
verified ·
1 Parent(s): 6459fa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -97,8 +97,8 @@ se_demo = gr.Interface(
97
  outputs = [
98
  gr.Audio(label="Output Audio", type="filepath"),
99
  ],
100
- title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Enhancement",
101
- description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and extracts clear speech from background noise for enhanced speech quality. It supports both 16 kHz and 48 kHz audio outputs. "
102
  "To try it, simply upload your audio, or click one of the examples. "),
103
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
104
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
@@ -118,8 +118,8 @@ ss_demo = gr.Interface(
118
  gr.Audio(label="Output Audio", type="filepath"),
119
  gr.Audio(label="Output Audio", type="filepath"),
120
  ],
121
- title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Separation",
122
- description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is powered by AI and separates individual speech from mixed audio. It supports 16 kHz and two output streams. "
123
  "To try it, simply upload your audio, or click one of the examples. "),
124
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> </p>"
125
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
@@ -138,8 +138,8 @@ tse_demo = gr.Interface(
138
  outputs = [
139
  gr.Gallery(label="Output Video List")
140
  ],
141
- title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Audio-Visual Speaker Extraction",
142
- description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and extracts each speaker's voice from a multi-speaker video using facial recognition. "
143
  "To try it, simply upload your video, or click one of the examples. "),
144
  # article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
145
  # "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
@@ -159,8 +159,8 @@ sr_demo = gr.Interface(
159
  outputs = [
160
  gr.Audio(label="Output Audio", type="filepath"),
161
  ],
162
- title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Super Resolution",
163
- description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
164
  "To try it, simply upload your audio, or click one of the examples. "),
165
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
166
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
 
97
  outputs = [
98
  gr.Audio(label="Output Audio", type="filepath"),
99
  ],
100
+ title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Enhancement",
101
+ description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and extracts clear speech from background noise for enhanced speech quality. It supports both 16 kHz and 48 kHz audio outputs. "
102
  "To try it, simply upload your audio, or click one of the examples. "),
103
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
104
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
 
118
  gr.Audio(label="Output Audio", type="filepath"),
119
  gr.Audio(label="Output Audio", type="filepath"),
120
  ],
121
+ title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Separation",
122
+ description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is powered by AI and separates individual speech from mixed audio. It supports 16 kHz and two output streams. "
123
  "To try it, simply upload your audio, or click one of the examples. "),
124
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> </p>"
125
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
 
138
  outputs = [
139
  gr.Gallery(label="Output Video List")
140
  ],
141
+ title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Audio-Visual Speaker Extraction",
142
+ description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and extracts each speaker's voice from a multi-speaker video using facial recognition. "
143
  "To try it, simply upload your video, or click one of the examples. "),
144
  # article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
145
  # "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
 
159
  outputs = [
160
  gr.Audio(label="Output Audio", type="filepath"),
161
  ],
162
+ title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Super Resolution",
163
+ description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
164
  "To try it, simply upload your audio, or click one of the examples. "),
165
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
166
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),