Spaces:
Sleeping
Sleeping
import os | |
from openai import OpenAI | |
import logging | |
import hydra | |
from dotenv import load_dotenv | |
import wandb | |
from omegaconf import DictConfig | |
load_dotenv() | |
api_key = os.getenv("OPENAI_API_KEY") | |
def speech_to_text(audio: bytes, openai_client: OpenAI, configuration: dict) -> str: | |
"""From the path of an audio file, it generates a text transcription using openai | |
Args: | |
audio_path (str): path of the audio containing the query | |
openai_client (OpenAI): client for openai connection | |
Returns: | |
str: transctiption text | |
""" | |
try: | |
# audio_file = open(audio_path, "rb") | |
transcription = openai_client.audio.transcriptions.create( | |
model=configuration["model"], | |
file=audio, # audio_file, | |
language=configuration["language"], | |
response_format=configuration["response_format"], | |
temperature=configuration["temperature"], | |
) | |
logging.info("Success: audio converted into text!") | |
logging.info(f"Audio transcription: {transcription}") | |
return transcription | |
except FileNotFoundError as e: | |
pass | |
logging.error(f"Error: not found - {str(e)}") | |
except Exception as e: | |
logging.error(f"Error: OpenAI API request failed - {str(e)}") | |
return f"error {str(e)}" | |
def speech_to_text_on_wandb(cfg: DictConfig): | |
openai_client = OpenAI() | |
run = wandb.init( | |
project=cfg.main.project_name, | |
group=cfg.main.experiment_name, | |
config=cfg.openai_parameters, | |
job_type="train_llm", | |
) | |
# download artifact | |
artifact = run.use_artifact( | |
os.path.join("mpoliti08/lux-voice-processing", cfg.main.audio_dataset), | |
type="audio", | |
) | |
artifact_dir = artifact.download() | |
table = wandb.Table(columns=["audio_file", "transcript"]) | |
for filename in os.listdir(artifact_dir): | |
file_path = os.path.join(artifact_dir, filename) | |
audio = open(file_path, "rb") | |
transcription_text = speech_to_text( | |
audio=audio, | |
openai_client=openai_client, | |
configuration=cfg.openai_parameters, | |
) | |
audio_file = wandb.Audio(file_path) | |
table.add_data(audio_file, transcription_text) | |
run.log({"Table": table}) | |
run.finish() | |
if __name__ == "__main__": | |
openai_client = OpenAI() | |
audio_path = "data/audio_recordings/0.wav" | |
configuration = { | |
"language": "it", | |
"model": "whisper-1", | |
"response_format": "text", | |
"temperature": 0.2, | |
} | |
audio = open("data/audio_recordings/0.wav", "rb") | |
res = speech_to_text( | |
audio=audio, openai_client=openai_client, configuration=configuration | |
) | |
print(res) | |
# speech_to_text_on_wandb() | |