marcellopoliti's picture
fix dockerfile
9da994b
import os
from openai import OpenAI
import logging
import hydra
from dotenv import load_dotenv
import wandb
from omegaconf import DictConfig
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
def speech_to_text(audio: bytes, openai_client: OpenAI, configuration: dict) -> str:
"""From the path of an audio file, it generates a text transcription using openai
Args:
audio_path (str): path of the audio containing the query
openai_client (OpenAI): client for openai connection
Returns:
str: transctiption text
"""
try:
# audio_file = open(audio_path, "rb")
transcription = openai_client.audio.transcriptions.create(
model=configuration["model"],
file=audio, # audio_file,
language=configuration["language"],
response_format=configuration["response_format"],
temperature=configuration["temperature"],
)
logging.info("Success: audio converted into text!")
logging.info(f"Audio transcription: {transcription}")
return transcription
except FileNotFoundError as e:
pass
logging.error(f"Error: not found - {str(e)}")
except Exception as e:
logging.error(f"Error: OpenAI API request failed - {str(e)}")
return f"error {str(e)}"
@hydra.main(config_path="../../conf", config_name="speech_to_text.yaml")
def speech_to_text_on_wandb(cfg: DictConfig):
openai_client = OpenAI()
run = wandb.init(
project=cfg.main.project_name,
group=cfg.main.experiment_name,
config=cfg.openai_parameters,
job_type="train_llm",
)
# download artifact
artifact = run.use_artifact(
os.path.join("mpoliti08/lux-voice-processing", cfg.main.audio_dataset),
type="audio",
)
artifact_dir = artifact.download()
table = wandb.Table(columns=["audio_file", "transcript"])
for filename in os.listdir(artifact_dir):
file_path = os.path.join(artifact_dir, filename)
audio = open(file_path, "rb")
transcription_text = speech_to_text(
audio=audio,
openai_client=openai_client,
configuration=cfg.openai_parameters,
)
audio_file = wandb.Audio(file_path)
table.add_data(audio_file, transcription_text)
run.log({"Table": table})
run.finish()
if __name__ == "__main__":
openai_client = OpenAI()
audio_path = "data/audio_recordings/0.wav"
configuration = {
"language": "it",
"model": "whisper-1",
"response_format": "text",
"temperature": 0.2,
}
audio = open("data/audio_recordings/0.wav", "rb")
res = speech_to_text(
audio=audio, openai_client=openai_client, configuration=configuration
)
print(res)
# speech_to_text_on_wandb()