varl42 commited on
Commit
9bd3992
·
1 Parent(s): 4563676

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -8
app.py CHANGED
@@ -7,7 +7,6 @@ import scipy
7
  from gtts import gTTS
8
  from io import BytesIO
9
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
10
- from transformers import VitsTokenizer, VitsModel
11
 
12
 
13
  def extract_text(pdf_file):
@@ -61,15 +60,11 @@ def text_to_audio(text):
61
  #buffer.seek(0)
62
  #return buffer.read()
63
 
64
- tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
65
- model = VitsModel.from_pretrained("facebook/mms-tts-eng")
66
 
67
- inputs = tokenizer([text], return_tensors="pt")
68
 
69
- with torch.no_grad():
70
- outputs = model(**inputs)
71
-
72
- return outputs.waveform[0]
73
 
74
  def audio_pdf(pdf_file):
75
  text = extract_text(pdf_file)
 
7
  from gtts import gTTS
8
  from io import BytesIO
9
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
10
 
11
 
12
  def extract_text(pdf_file):
 
60
  #buffer.seek(0)
61
  #return buffer.read()
62
 
63
+ synthesiser = pipeline("text-to-speech", "suno/bark")
 
64
 
65
+ speech = synthesiser[str("summary"), forward_params={"do_sample": True}]
66
 
67
+ scipy.io.wavfile.write("speech.wav", rate=speech["sampling_rate"], data=speech["audio"])
 
 
 
68
 
69
  def audio_pdf(pdf_file):
70
  text = extract_text(pdf_file)