Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import torchaudio
|
|
| 5 |
from transformers import AutoProcessor, SeamlessM4TModel
|
| 6 |
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
|
| 7 |
model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
|
| 8 |
-
model.to('cuda')
|
| 9 |
|
| 10 |
language_dict = {
|
| 11 |
"Modern Standard Arabic" : "arb",
|
|
@@ -57,14 +57,14 @@ def png(source_lang,target_lang,audio,text):
|
|
| 57 |
processed_inputs = processor(text, src_lang=source_lang_code, return_tensors="pt")
|
| 58 |
else:
|
| 59 |
sample_rate, audio_data = audio
|
| 60 |
-
audio_tokens = torch.from_numpy(audio_data)
|
| 61 |
audio_tokens = audio_tokens.to(torch.float32)
|
| 62 |
audio_tokens = torchaudio.functional.resample(audio_tokens, orig_freq=sample_rate, new_freq=16_000)
|
| 63 |
-
audio_tokens = audio_tokens.cpu()
|
| 64 |
processed_inputs = processor(audios=audio_tokens, sampling_rate=16000, return_tensors="pt")
|
| 65 |
|
| 66 |
|
| 67 |
-
processed_inputs = processed_inputs.to("cuda")
|
| 68 |
generated_audio = model.generate(**processed_inputs, tgt_lang=target_lang_code)[0].cpu().numpy().squeeze()
|
| 69 |
output_tokens = model.generate(**processed_inputs, tgt_lang=target_lang_code, generate_speech=False)
|
| 70 |
generated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
|
|
|
|
| 5 |
from transformers import AutoProcessor, SeamlessM4TModel
|
| 6 |
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
|
| 7 |
model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
|
| 8 |
+
# model.to('cuda')
|
| 9 |
|
| 10 |
language_dict = {
|
| 11 |
"Modern Standard Arabic" : "arb",
|
|
|
|
| 57 |
processed_inputs = processor(text, src_lang=source_lang_code, return_tensors="pt")
|
| 58 |
else:
|
| 59 |
sample_rate, audio_data = audio
|
| 60 |
+
audio_tokens = torch.from_numpy(audio_data) #.to(torch.device("cuda"))
|
| 61 |
audio_tokens = audio_tokens.to(torch.float32)
|
| 62 |
audio_tokens = torchaudio.functional.resample(audio_tokens, orig_freq=sample_rate, new_freq=16_000)
|
| 63 |
+
# audio_tokens = audio_tokens.cpu()
|
| 64 |
processed_inputs = processor(audios=audio_tokens, sampling_rate=16000, return_tensors="pt")
|
| 65 |
|
| 66 |
|
| 67 |
+
# processed_inputs = processed_inputs.to("cuda")
|
| 68 |
generated_audio = model.generate(**processed_inputs, tgt_lang=target_lang_code)[0].cpu().numpy().squeeze()
|
| 69 |
output_tokens = model.generate(**processed_inputs, tgt_lang=target_lang_code, generate_speech=False)
|
| 70 |
generated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
|