import os from openai import OpenAI import logging import hydra from dotenv import load_dotenv import wandb from omegaconf import DictConfig load_dotenv() api_key = os.getenv("OPENAI_API_KEY") def speech_to_text(audio: bytes, openai_client: OpenAI, configuration: dict) -> str: """From the path of an audio file, it generates a text transcription using openai Args: audio_path (str): path of the audio containing the query openai_client (OpenAI): client for openai connection Returns: str: transctiption text """ try: # audio_file = open(audio_path, "rb") transcription = openai_client.audio.transcriptions.create( model=configuration["model"], file=audio, # audio_file, language=configuration["language"], response_format=configuration["response_format"], temperature=configuration["temperature"], ) logging.info("Success: audio converted into text!") logging.info(f"Audio transcription: {transcription}") return transcription except FileNotFoundError as e: pass logging.error(f"Error: not found - {str(e)}") except Exception as e: logging.error(f"Error: OpenAI API request failed - {str(e)}") return f"error {str(e)}" @hydra.main(config_path="../../conf", config_name="speech_to_text.yaml") def speech_to_text_on_wandb(cfg: DictConfig): openai_client = OpenAI() run = wandb.init( project=cfg.main.project_name, group=cfg.main.experiment_name, config=cfg.openai_parameters, job_type="train_llm", ) # download artifact artifact = run.use_artifact( os.path.join("mpoliti08/lux-voice-processing", cfg.main.audio_dataset), type="audio", ) artifact_dir = artifact.download() table = wandb.Table(columns=["audio_file", "transcript"]) for filename in os.listdir(artifact_dir): file_path = os.path.join(artifact_dir, filename) audio = open(file_path, "rb") transcription_text = speech_to_text( audio=audio, openai_client=openai_client, configuration=cfg.openai_parameters, ) audio_file = wandb.Audio(file_path) table.add_data(audio_file, transcription_text) run.log({"Table": table}) run.finish() if __name__ == "__main__": openai_client = OpenAI() audio_path = "data/audio_recordings/0.wav" configuration = { "language": "it", "model": "whisper-1", "response_format": "text", "temperature": 0.2, } audio = open("data/audio_recordings/0.wav", "rb") res = speech_to_text( audio=audio, openai_client=openai_client, configuration=configuration ) print(res) # speech_to_text_on_wandb()