alibabasglab commited on
Commit
469b4a9
·
verified ·
1 Parent(s): 963e3bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -100,8 +100,8 @@ se_demo = gr.Interface(
100
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Enhancement",
101
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and extracts clear speech from background noise for enhanced speech quality. It supports both 16 kHz and 48 kHz audio outputs. "
102
  "To try it, simply upload your audio, or click one of the examples. "),
103
- article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
104
- "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
105
  examples = [
106
  ["examples/mandarin_speech_16kHz.wav", "16000 Hz"],
107
  ["examples/english_speech_48kHz.wav", "48000 Hz"],
@@ -121,8 +121,8 @@ ss_demo = gr.Interface(
121
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Separation",
122
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is powered by AI and separates individual speech from mixed audio. It supports 16 kHz and two output streams. "
123
  "To try it, simply upload your audio, or click one of the examples. "),
124
- article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> </p>"
125
- "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
126
  examples = [
127
  ['examples/female_female_speech.wav'],
128
  ['examples/female_male_speech.wav'],
@@ -141,8 +141,8 @@ tse_demo = gr.Interface(
141
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Audio-Visual Speaker Extraction",
142
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and extracts each speaker's voice from a multi-speaker video using facial recognition. "
143
  "To try it, simply upload your video, or click one of the examples. "),
144
- # article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
145
- # "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
146
  examples = [
147
  ['examples/001.mp4'],
148
  ['examples/002.mp4'],
@@ -162,8 +162,9 @@ sr_demo = gr.Interface(
162
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Super Resolution",
163
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
164
  "To try it, simply upload your audio, or click one of the examples. "),
165
- article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
166
- "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
 
167
  examples = [
168
  ["examples/mandarin_speech_16kHz.wav", True],
169
  ["examples/LJSpeech-001-0001-22k.wav", True],
 
100
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Enhancement",
101
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and extracts clear speech from background noise for enhanced speech quality. It supports both 16 kHz and 48 kHz audio outputs. "
102
  "To try it, simply upload your audio, or click one of the examples. "),
103
+ article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement (ICASSP 2022)</a> </p>"
104
+ "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> </p>"),
105
  examples = [
106
  ["examples/mandarin_speech_16kHz.wav", "16000 Hz"],
107
  ["examples/english_speech_48kHz.wav", "48000 Hz"],
 
121
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Separation",
122
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is powered by AI and separates individual speech from mixed audio. It supports 16 kHz and two output streams. "
123
  "To try it, simply upload your audio, or click one of the examples. "),
124
+ article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions (ICASSP 2023)</a> </p>"
125
+ "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> </p>"),
126
  examples = [
127
  ['examples/female_female_speech.wav'],
128
  ['examples/female_male_speech.wav'],
 
141
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Audio-Visual Speaker Extraction",
142
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and extracts each speaker's voice from a multi-speaker video using facial recognition. "
143
  "To try it, simply upload your video, or click one of the examples. "),
144
+ # article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions (ICASSP 2023)</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
145
+ # "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
146
  examples = [
147
  ['examples/001.mp4'],
148
  ['examples/002.mp4'],
 
162
  title = "<a href='https://github.com/modelscope/ClearerVoice-Studio' target='_blank'>ClearVoice<a/>: Speech Super Resolution",
163
  description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
164
  "To try it, simply upload your audio, or click one of the examples. "),
165
+ article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement (ICASSP 2022)</a> </p>"
166
+ "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> </p>"
167
+ "<p style='text-align: center'><a href='https://arxiv.org/abs/2501.10045' target='_blank'>HiFi-SR: A Unified Generative Transformer-Convolutional Adversarial Network for High-Fidelity Speech Super-Resolution (ICASSP 2025)</a> </p>"),
168
  examples = [
169
  ["examples/mandarin_speech_16kHz.wav", True],
170
  ["examples/LJSpeech-001-0001-22k.wav", True],