Spaces:
Build error
Build error
| import os | |
| from openai import OpenAI | |
| import logging | |
| import hydra | |
| from dotenv import load_dotenv | |
| import wandb | |
| from omegaconf import DictConfig | |
| load_dotenv() | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| def speech_to_text(audio: bytes, openai_client: OpenAI, configuration: dict) -> str: | |
| """From the path of an audio file, it generates a text transcription using openai | |
| Args: | |
| audio_path (str): path of the audio containing the query | |
| openai_client (OpenAI): client for openai connection | |
| Returns: | |
| str: transctiption text | |
| """ | |
| try: | |
| # audio_file = open(audio_path, "rb") | |
| transcription = openai_client.audio.transcriptions.create( | |
| model=configuration["model"], | |
| file=audio, # audio_file, | |
| language=configuration["language"], | |
| response_format=configuration["response_format"], | |
| temperature=configuration["temperature"], | |
| ) | |
| logging.info("Success: audio converted into text!") | |
| logging.info(f"Audio transcription: {transcription}") | |
| return transcription | |
| except FileNotFoundError as e: | |
| pass | |
| logging.error(f"Error: not found - {str(e)}") | |
| except Exception as e: | |
| logging.error(f"Error: OpenAI API request failed - {str(e)}") | |
| return f"error {str(e)}" | |
| def speech_to_text_on_wandb(cfg: DictConfig): | |
| openai_client = OpenAI() | |
| run = wandb.init( | |
| project=cfg.main.project_name, | |
| group=cfg.main.experiment_name, | |
| config=cfg.openai_parameters, | |
| job_type="train_llm", | |
| ) | |
| # download artifact | |
| artifact = run.use_artifact( | |
| os.path.join("mpoliti08/lux-voice-processing", cfg.main.audio_dataset), | |
| type="audio", | |
| ) | |
| artifact_dir = artifact.download() | |
| table = wandb.Table(columns=["audio_file", "transcript"]) | |
| for filename in os.listdir(artifact_dir): | |
| file_path = os.path.join(artifact_dir, filename) | |
| audio = open(file_path, "rb") | |
| transcription_text = speech_to_text( | |
| audio=audio, | |
| openai_client=openai_client, | |
| configuration=cfg.openai_parameters, | |
| ) | |
| audio_file = wandb.Audio(file_path) | |
| table.add_data(audio_file, transcription_text) | |
| run.log({"Table": table}) | |
| run.finish() | |
| if __name__ == "__main__": | |
| openai_client = OpenAI() | |
| audio_path = "data/audio_recordings/0.wav" | |
| configuration = { | |
| "language": "it", | |
| "model": "whisper-1", | |
| "response_format": "text", | |
| "temperature": 0.2, | |
| } | |
| audio = open("data/audio_recordings/0.wav", "rb") | |
| res = speech_to_text( | |
| audio=audio, openai_client=openai_client, configuration=configuration | |
| ) | |
| print(res) | |
| # speech_to_text_on_wandb() | |