Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import subprocess | |
| import soundfile as sf | |
| from speech_recognition import AudioFile, Recognizer | |
| greeting_list = ["γγγ£γγγγΎγ", | |
| "γγγ£γγγ", | |
| "γγγ£γγγγΎγγΌ", | |
| "γγγ«γ‘γ―", | |
| "γγ―γγγγγγΎγ", | |
| "γγ―γγ", | |
| "γγ―γγΌ", | |
| "γγ―γΌ", | |
| ] | |
| def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array: | |
| """ | |
| Helper function to read an audio file through ffmpeg. | |
| """ | |
| ar = f"{sampling_rate}" | |
| ac = "1" | |
| format_for_conversion = "f32le" | |
| ffmpeg_command = [ | |
| "ffmpeg", | |
| "-i", | |
| "pipe:0", | |
| "-ac", | |
| ac, | |
| "-ar", | |
| ar, | |
| "-f", | |
| format_for_conversion, | |
| "-hide_banner", | |
| "-loglevel", | |
| "quiet", | |
| "pipe:1", | |
| ] | |
| try: | |
| ffmpeg_process = subprocess.Popen(ffmpeg_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE) | |
| except FileNotFoundError: | |
| raise ValueError("ffmpeg was not found but is required to load audio files from filename") | |
| output_stream = ffmpeg_process.communicate(bpayload) | |
| out_bytes = output_stream[0] | |
| audio = np.frombuffer(out_bytes, np.float32) | |
| sf.write('temp.wav', audio, sampling_rate, subtype='PCM_16') | |
| return 'temp.wav' | |
| def stt(audio: object, language='ja') -> str: | |
| """Converts speech to text. | |
| Args: | |
| audio: record of user speech | |
| language (str): language of text | |
| Returns: | |
| text (str): recognized speech of user | |
| """ | |
| # Create a Recognizer object | |
| r = Recognizer() | |
| # Open the audio file | |
| with AudioFile(audio) as source: | |
| # Listen for the data (load audio to memory) | |
| audio_data = r.record(source) | |
| # Transcribe the audio using Google's speech-to-text API | |
| text = r.recognize_google(audio_data, language=language) | |
| return text | |