leofltt commited on
Commit
9ff0018
·
verified ·
1 Parent(s): 3999b52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -8,8 +8,6 @@ from transformers import BarkModel, BarkProcessor
8
 
9
  from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
10
 
11
- SAMPLE_RATE = 16000
12
-
13
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
14
 
15
  asr_model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
@@ -20,7 +18,7 @@ bark_processor = BarkProcessor.from_pretrained("suno/bark")
20
 
21
 
22
  def translate(audio):
23
- inputs = asr_processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
24
  generated_ids = asr_model.generate(inputs["input_features"],attention_mask=inputs["attention_mask"],
25
  forced_bos_token_id=asr_processor.tokenizer.lang_code_to_id["it"],)
26
  translation = asr_processor.batch_decode(generated_ids, skip_special_tokens=True)
@@ -37,7 +35,7 @@ def speech_to_speech_translation(audio):
37
  translated_text = translate(audio)
38
  synthesised_speech = synthesise(translated_text)
39
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
40
- return SAMPLE_RATE, synthesised_speech
41
 
42
 
43
  title = "Cascaded STST"
 
8
 
9
  from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
10
 
 
 
11
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
 
13
  asr_model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
 
18
 
19
 
20
  def translate(audio):
21
+ inputs = asr_processor(audio, sampling_rate=16000, return_tensors="pt")
22
  generated_ids = asr_model.generate(inputs["input_features"],attention_mask=inputs["attention_mask"],
23
  forced_bos_token_id=asr_processor.tokenizer.lang_code_to_id["it"],)
24
  translation = asr_processor.batch_decode(generated_ids, skip_special_tokens=True)
 
35
  translated_text = translate(audio)
36
  synthesised_speech = synthesise(translated_text)
37
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
38
+ return 16000, synthesised_speech
39
 
40
 
41
  title = "Cascaded STST"