Spaces:
Runtime error
Runtime error
aar2dee2
commited on
Commit
·
5042d26
1
Parent(s):
ae363d1
convert numpy array to audio segment
Browse files- app.py +16 -2
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
# # Import required libraries
|
2 |
|
|
|
|
|
3 |
import vocode
|
4 |
from vocode import getenv
|
5 |
import gradio as gr
|
@@ -66,6 +68,18 @@ logger = logging.getLogger(__name__)
|
|
66 |
logger.setLevel(logging.DEBUG)
|
67 |
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def main(input_audio):
|
70 |
logger.info(f"Type of input_audio: {type(input_audio)}")
|
71 |
logger.info(f"input_audio: {input_audio}")
|
@@ -88,7 +102,8 @@ def main(input_audio):
|
|
88 |
while True:
|
89 |
try:
|
90 |
# Transcribe the input_audio using WhisperTranscriber
|
91 |
-
|
|
|
92 |
logger.info(f"Transcription: {transcript}")
|
93 |
response = agent.generate_response(transcript)
|
94 |
logger.info(f"Agent response: {response}")
|
@@ -118,7 +133,6 @@ file_translate = gr.Interface(
|
|
118 |
fn=main,
|
119 |
inputs=gr.Audio(source="upload", type="filepath", format="wav"),
|
120 |
outputs=gr.Audio(label="Generated Speech", type="filepath", format="wav"),
|
121 |
-
examples=[["./data/example/strauss-oppenheimer.wav"]],
|
122 |
title=title,
|
123 |
description=description,
|
124 |
)
|
|
|
1 |
# # Import required libraries
|
2 |
|
3 |
+
import numpy as np
|
4 |
+
from pydub import AudioSegment
|
5 |
import vocode
|
6 |
from vocode import getenv
|
7 |
import gradio as gr
|
|
|
68 |
logger.setLevel(logging.DEBUG)
|
69 |
|
70 |
|
71 |
+
def convert_to_audio_segment(input_audio):
|
72 |
+
sample_rate, audio_data = input_audio
|
73 |
+
audio_data = audio_data.astype(np.int16) # Convert to 16-bit data
|
74 |
+
audio_segment = AudioSegment(
|
75 |
+
audio_data.tobytes(), # Convert numpy array to bytes
|
76 |
+
frame_rate=sample_rate,
|
77 |
+
sample_width=audio_data.dtype.itemsize, # 2 bytes for 16-bit audio
|
78 |
+
channels=1 # mono audio
|
79 |
+
)
|
80 |
+
return audio_segment
|
81 |
+
|
82 |
+
|
83 |
def main(input_audio):
|
84 |
logger.info(f"Type of input_audio: {type(input_audio)}")
|
85 |
logger.info(f"input_audio: {input_audio}")
|
|
|
102 |
while True:
|
103 |
try:
|
104 |
# Transcribe the input_audio using WhisperTranscriber
|
105 |
+
input_audio_segment = convert_to_audio_segment(input_audio)
|
106 |
+
transcript = transcriber.transcribe(input_audio_segment)
|
107 |
logger.info(f"Transcription: {transcript}")
|
108 |
response = agent.generate_response(transcript)
|
109 |
logger.info(f"Agent response: {response}")
|
|
|
133 |
fn=main,
|
134 |
inputs=gr.Audio(source="upload", type="filepath", format="wav"),
|
135 |
outputs=gr.Audio(label="Generated Speech", type="filepath", format="wav"),
|
|
|
136 |
title=title,
|
137 |
description=description,
|
138 |
)
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
vocode[io]
|
2 |
google-cloud-texttospeech
|
3 |
-
gradio==3.43.2
|
|
|
|
1 |
vocode[io]
|
2 |
google-cloud-texttospeech
|
3 |
+
gradio==3.43.2
|
4 |
+
numpy
|