Spaces:

dlflannery
/

GradioTest

Running

App Files Files Community

dlflannery commited on Sep 1, 2024

Commit

e00b48e

verified ·

1 Parent(s): f30621f

Update app.py

Browse files

Chunking spoken audio

Files changed (1) hide show

app.py +93 -27

app.py CHANGED Viewed

@@ -1,11 +1,17 @@
 import os
 import gradio as gr
 # import openai
 from openai import OpenAI
 from dotenv import load_dotenv
 from pathlib import Path
 from time import sleep
 import audioread
 load_dotenv(override=True)
 key = os.getenv('OPENAI_API_KEY')
@@ -24,12 +30,16 @@ else:
     dp.mkdir(exist_ok=True)
     dataDir = '/data/'
-speak_file = dataDir + "speek.wav"
 client = OpenAI(api_key = key)
-words_out = []
-index = 0
 def genUsageStats(do_reset=False):
     result = []
@@ -195,39 +205,95 @@ def set_speak_button(txt):
     return gr.Button(visible=vis)
 def clean_up():
-    global words_out, index
-    if os.path.exists(speak_file):
-        os.remove(speak_file)
-    words_out=[]
-    index=0
 with gr.Blocks() as demo:
     def initial_audio_output(txt):
-        global words_out, index
-        index = 0
-        words_out = txt.strip(' .').split('.')
-        chunk = words_out[0]
         if chunk.strip() == '':
             return gr.Audio(sources=None)
-        response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.8)
-        index += 1
-        with open(speak_file, 'wb') as fp:
-            fp.write(response.content)
-        return speak_file
     def gen_output_audio():
-        global words_out, index
-        if index >= len(words_out):
-            return gr.Audio(sources=None)
-        chunk = words_out[index]
-        if chunk.strip() == '':
             return gr.Audio(sources=None)
-        response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.8)
-        index += 1
-        with open(speak_file, 'wb') as fp:
-            fp.write(response.content)
-        return speak_file
     history = gr.State([])
     password = gr.State("")

 import os
+from re import L
+import tempfile
 import gradio as gr
 # import openai
+from numpy._core.defchararray import isdecimal
 from openai import OpenAI
 from dotenv import load_dotenv
 from pathlib import Path
 from time import sleep
 import audioread
+import queue
+import threading
+from tempfile import NamedTemporaryFile
 load_dotenv(override=True)
 key = os.getenv('OPENAI_API_KEY')
     dp.mkdir(exist_ok=True)
     dataDir = '/data/'
+#speak_file = dataDir + "speek.wav"
 client = OpenAI(api_key = key)
+qspeech = queue.Queue()
+qdelete = queue.Queue()
+#digits = ['zero: ','one: ','two: ','three: ','four: ','five: ','six: ','seven: ','eight: ','nine: ']
+abbrevs = {'St. ' : 'Saint ', 'Mr. ': 'mister ', 'Mrs. ':'mussus ', 'Mr. ':'mister ', 'Ms. ':'mizz '}
 def genUsageStats(do_reset=False):
     result = []
     return gr.Button(visible=vis)
 def clean_up():
+    while not qdelete.empty():
+        fname = qdelete.get()
+        if os.path.exists(fname):
+            os.remove(fname)
+def speech_worker(chunks=[]):
+    for chunk in chunks:
+        response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85)
+        tempFile = NamedTemporaryFile(mode='wb', delete=False)
+        tempFile.write(response.content)
+        tempFile.close()
+        qdelete.put (tempFile.name)
+        qspeech.put(tempFile.name)
 with gr.Blocks() as demo:
     def initial_audio_output(txt):
+        global digits
+        global abbrevs
+        while not qspeech.empty():
+            dud = qspeech.get()
+        for s,x in abbrevs.items():
+            txt = txt.replace(s, x)
+        words_in = txt.replace('**', '').splitlines(False)
+        words_out = []
+        for s in words_in:
+            s = s.lstrip('- *@#$%^&_=+-')
+            if len(s) > 0:
+                loc = s.index(' ')
+                if loc > 1:
+                    val = s[0:loc]
+                    isnum = val.replace('.','0').isdecimal()
+                    if isnum:
+                        if val.endswith('.'):
+                            val = val[:-1].replace('.',' point ') + '., '
+                        else:
+                            val = val.replace('.', ' point ') + ', '
+                        s = 'num'+ val + s[loc:]
+                words_out.append(s)
+        chunklist = []
+        for chunk in words_out:
+            if chunk.strip() == '':
+                continue
+            isnumbered = chunk.startswith('num')
+            number = ''
+            loc = 0
+            if isnumbered:
+                chunk = chunk[3:]
+                loc = chunk.index(',')
+                number = chunk[0:loc]
+                chunk = chunk[loc:]
+            locs = []
+            for i in range(1,len(chunk)-1):
+                (a, b, c) = chunk[i-1:i+2]
+                if a.isdecimal() and b == '.' and c.isdecimal():
+                    locs.append(i)
+            for i in locs:
+                chunk = chunk[:i] + ' point ' + chunk[i+1:]
+            if len(chunk) > 50:
+                finechunks = chunk.split('.')
+                for fchunk in finechunks:
+                    if isnumbered:
+                        fchunk = number + fchunk
+                        isnumbered = False
+                    if len(fchunk) > 0:
+                        if fchunk != '"':
+                            chunklist.append(fchunk)
+            else:
+                line = number + chunk
+                if line != '"':
+                    chunklist.append(line)
+        chunk = chunklist[0]
         if chunk.strip() == '':
             return gr.Audio(sources=None)
+        if len(chunklist) > 1:
+            threading.Thread(target=speech_worker, daemon=True, args=(chunklist[1:],)).start()
+        response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85)
+        tempFile = NamedTemporaryFile(mode='wb', delete=False)
+        tempFile.write(response.content)
+        tempFile.close()
+        qdelete.put(tempFile.name)
+        return tempFile.name
     def gen_output_audio():
+        try:
+            fname = qspeech.get(timeout=5)
+        except:
             return gr.Audio(sources=None)
+        return fname
     history = gr.State([])
     password = gr.State("")