dlflannery commited on
Commit
e00b48e
·
verified ·
1 Parent(s): f30621f

Update app.py

Browse files

Chunking spoken audio

Files changed (1) hide show
  1. app.py +93 -27
app.py CHANGED
@@ -1,11 +1,17 @@
1
  import os
 
 
2
  import gradio as gr
3
  # import openai
 
4
  from openai import OpenAI
5
  from dotenv import load_dotenv
6
  from pathlib import Path
7
  from time import sleep
8
  import audioread
 
 
 
9
 
10
  load_dotenv(override=True)
11
  key = os.getenv('OPENAI_API_KEY')
@@ -24,12 +30,16 @@ else:
24
  dp.mkdir(exist_ok=True)
25
  dataDir = '/data/'
26
 
27
- speak_file = dataDir + "speek.wav"
28
 
29
  client = OpenAI(api_key = key)
30
 
31
- words_out = []
32
- index = 0
 
 
 
 
33
 
34
  def genUsageStats(do_reset=False):
35
  result = []
@@ -195,39 +205,95 @@ def set_speak_button(txt):
195
  return gr.Button(visible=vis)
196
 
197
  def clean_up():
198
- global words_out, index
199
- if os.path.exists(speak_file):
200
- os.remove(speak_file)
201
- words_out=[]
202
- index=0
 
 
 
 
 
 
 
 
203
 
204
 
205
  with gr.Blocks() as demo:
206
  def initial_audio_output(txt):
207
- global words_out, index
208
- index = 0
209
- words_out = txt.strip(' .').split('.')
210
- chunk = words_out[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  if chunk.strip() == '':
212
  return gr.Audio(sources=None)
213
- response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.8)
214
- index += 1
215
- with open(speak_file, 'wb') as fp:
216
- fp.write(response.content)
217
- return speak_file
 
 
 
218
 
219
  def gen_output_audio():
220
- global words_out, index
221
- if index >= len(words_out):
222
- return gr.Audio(sources=None)
223
- chunk = words_out[index]
224
- if chunk.strip() == '':
225
  return gr.Audio(sources=None)
226
- response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.8)
227
- index += 1
228
- with open(speak_file, 'wb') as fp:
229
- fp.write(response.content)
230
- return speak_file
231
 
232
  history = gr.State([])
233
  password = gr.State("")
 
1
  import os
2
+ from re import L
3
+ import tempfile
4
  import gradio as gr
5
  # import openai
6
+ from numpy._core.defchararray import isdecimal
7
  from openai import OpenAI
8
  from dotenv import load_dotenv
9
  from pathlib import Path
10
  from time import sleep
11
  import audioread
12
+ import queue
13
+ import threading
14
+ from tempfile import NamedTemporaryFile
15
 
16
  load_dotenv(override=True)
17
  key = os.getenv('OPENAI_API_KEY')
 
30
  dp.mkdir(exist_ok=True)
31
  dataDir = '/data/'
32
 
33
+ #speak_file = dataDir + "speek.wav"
34
 
35
  client = OpenAI(api_key = key)
36
 
37
+ qspeech = queue.Queue()
38
+ qdelete = queue.Queue()
39
+
40
+ #digits = ['zero: ','one: ','two: ','three: ','four: ','five: ','six: ','seven: ','eight: ','nine: ']
41
+
42
+ abbrevs = {'St. ' : 'Saint ', 'Mr. ': 'mister ', 'Mrs. ':'mussus ', 'Mr. ':'mister ', 'Ms. ':'mizz '}
43
 
44
  def genUsageStats(do_reset=False):
45
  result = []
 
205
  return gr.Button(visible=vis)
206
 
207
  def clean_up():
208
+ while not qdelete.empty():
209
+ fname = qdelete.get()
210
+ if os.path.exists(fname):
211
+ os.remove(fname)
212
+
213
+ def speech_worker(chunks=[]):
214
+ for chunk in chunks:
215
+ response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85)
216
+ tempFile = NamedTemporaryFile(mode='wb', delete=False)
217
+ tempFile.write(response.content)
218
+ tempFile.close()
219
+ qdelete.put (tempFile.name)
220
+ qspeech.put(tempFile.name)
221
 
222
 
223
  with gr.Blocks() as demo:
224
  def initial_audio_output(txt):
225
+ global digits
226
+ global abbrevs
227
+ while not qspeech.empty():
228
+ dud = qspeech.get()
229
+ for s,x in abbrevs.items():
230
+ txt = txt.replace(s, x)
231
+ words_in = txt.replace('**', '').splitlines(False)
232
+ words_out = []
233
+ for s in words_in:
234
+ s = s.lstrip('- *@#$%^&_=+-')
235
+ if len(s) > 0:
236
+ loc = s.index(' ')
237
+ if loc > 1:
238
+ val = s[0:loc]
239
+ isnum = val.replace('.','0').isdecimal()
240
+ if isnum:
241
+ if val.endswith('.'):
242
+ val = val[:-1].replace('.',' point ') + '., '
243
+ else:
244
+ val = val.replace('.', ' point ') + ', '
245
+ s = 'num'+ val + s[loc:]
246
+ words_out.append(s)
247
+ chunklist = []
248
+ for chunk in words_out:
249
+ if chunk.strip() == '':
250
+ continue
251
+ isnumbered = chunk.startswith('num')
252
+ number = ''
253
+ loc = 0
254
+ if isnumbered:
255
+ chunk = chunk[3:]
256
+ loc = chunk.index(',')
257
+ number = chunk[0:loc]
258
+ chunk = chunk[loc:]
259
+ locs = []
260
+ for i in range(1,len(chunk)-1):
261
+ (a, b, c) = chunk[i-1:i+2]
262
+ if a.isdecimal() and b == '.' and c.isdecimal():
263
+ locs.append(i)
264
+ for i in locs:
265
+ chunk = chunk[:i] + ' point ' + chunk[i+1:]
266
+ if len(chunk) > 50:
267
+ finechunks = chunk.split('.')
268
+ for fchunk in finechunks:
269
+ if isnumbered:
270
+ fchunk = number + fchunk
271
+ isnumbered = False
272
+ if len(fchunk) > 0:
273
+ if fchunk != '"':
274
+ chunklist.append(fchunk)
275
+ else:
276
+ line = number + chunk
277
+ if line != '"':
278
+ chunklist.append(line)
279
+ chunk = chunklist[0]
280
  if chunk.strip() == '':
281
  return gr.Audio(sources=None)
282
+ if len(chunklist) > 1:
283
+ threading.Thread(target=speech_worker, daemon=True, args=(chunklist[1:],)).start()
284
+ response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85)
285
+ tempFile = NamedTemporaryFile(mode='wb', delete=False)
286
+ tempFile.write(response.content)
287
+ tempFile.close()
288
+ qdelete.put(tempFile.name)
289
+ return tempFile.name
290
 
291
  def gen_output_audio():
292
+ try:
293
+ fname = qspeech.get(timeout=5)
294
+ except:
 
 
295
  return gr.Audio(sources=None)
296
+ return fname
 
 
 
 
297
 
298
  history = gr.State([])
299
  password = gr.State("")