Spaces:

marcellopoliti
/

lux-voice-processing

Sleeping

App Files Files Community

lux-voice-processing / src /models /openai_stt.py

marcellopoliti

fix dockerfile

9da994b about 1 year ago

raw

history blame contribute delete

2.84 kB

	import os
	from openai import OpenAI
	import logging
	import hydra
	from dotenv import load_dotenv
	import wandb
	from omegaconf import DictConfig


	load_dotenv()
	api_key = os.getenv("OPENAI_API_KEY")


	def speech_to_text(audio: bytes, openai_client: OpenAI, configuration: dict) -> str:
	"""From the path of an audio file, it generates a text transcription using openai

	Args:
	audio_path (str): path of the audio containing the query
	openai_client (OpenAI): client for openai connection

	Returns:
	str: transctiption text
	"""
	try:
	# audio_file = open(audio_path, "rb")
	transcription = openai_client.audio.transcriptions.create(
	model=configuration["model"],
	file=audio, # audio_file,
	language=configuration["language"],
	response_format=configuration["response_format"],
	temperature=configuration["temperature"],
	)

	logging.info("Success: audio converted into text!")
	logging.info(f"Audio transcription: {transcription}")
	return transcription
	except FileNotFoundError as e:
	pass
	logging.error(f"Error: not found - {str(e)}")
	except Exception as e:
	logging.error(f"Error: OpenAI API request failed - {str(e)}")
	return f"error {str(e)}"


	@hydra.main(config_path="../../conf", config_name="speech_to_text.yaml")
	def speech_to_text_on_wandb(cfg: DictConfig):
	openai_client = OpenAI()
	run = wandb.init(
	project=cfg.main.project_name,
	group=cfg.main.experiment_name,
	config=cfg.openai_parameters,
	job_type="train_llm",
	)

	# download artifact
	artifact = run.use_artifact(
	os.path.join("mpoliti08/lux-voice-processing", cfg.main.audio_dataset),
	type="audio",
	)
	artifact_dir = artifact.download()

	table = wandb.Table(columns=["audio_file", "transcript"])

	for filename in os.listdir(artifact_dir):
	file_path = os.path.join(artifact_dir, filename)
	audio = open(file_path, "rb")
	transcription_text = speech_to_text(
	audio=audio,
	openai_client=openai_client,
	configuration=cfg.openai_parameters,
	)

	audio_file = wandb.Audio(file_path)
	table.add_data(audio_file, transcription_text)

	run.log({"Table": table})
	run.finish()


	if __name__ == "__main__":
	openai_client = OpenAI()
	audio_path = "data/audio_recordings/0.wav"
	configuration = {
	"language": "it",
	"model": "whisper-1",
	"response_format": "text",
	"temperature": 0.2,
	}

	audio = open("data/audio_recordings/0.wav", "rb")
	res = speech_to_text(
	audio=audio, openai_client=openai_client, configuration=configuration
	)
	print(res)

	# speech_to_text_on_wandb()