Spaces:

Boltz79
/

Sentiment-Analysis

Sleeping

App Files Files Community

Sentiment-Analysis / app.py

Boltz79

Update app.py

ddf32d8 verified 5 months ago

raw

history blame

4.24 kB

	import gradio as gr
	import numpy as np
	import torch
	from transformers import pipeline
	import librosa
	import soundfile as sf

	class EmotionRecognizer:
	def __init__(self):
	self.classifier = pipeline(
	"audio-classification",
	model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
	device=0 if torch.cuda.is_available() else -1
	)
	self.target_sr = 16000 # Target sample rate for the model
	self.max_duration = 10 # Max audio duration in seconds

	def process_audio(self, audio_path):
	try:
	# Load audio file using soundfile (works better in Hugging Face Spaces)
	audio, orig_sr = sf.read(audio_path)

	# Convert stereo to mono if needed
	if len(audio.shape) > 1:
	audio = np.mean(audio, axis=1)

	# Resample if necessary
	if orig_sr != self.target_sr:
	audio = librosa.resample(
	y=audio.astype(np.float32),
	orig_sr=orig_sr,
	target_sr=self.target_sr
	)
	else:
	audio = audio.astype(np.float32)

	# Normalize audio
	audio = librosa.util.normalize(audio)

	# Trim/pad audio to max duration
	max_samples = self.max_duration * self.target_sr
	if len(audio) > max_samples:
	audio = audio[:max_samples]
	else:
	audio = np.pad(audio, (0, max(0, max_samples - len(audio))))

	# Run classification
	results = self.classifier(
	{"array": audio, "sampling_rate": self.target_sr}
	)

	# Format output
	labels = [res["label"] for res in results]
	scores = [res["score"] * 100 for res in results]

	text_output = "\n".join([
	f"{label}: {score:.2f}%"
	for label, score in zip(labels, scores)
	])

	plot_data = {
	"labels": labels,
	"values": scores
	}

	return text_output, plot_data

	except Exception as e:
	error_msg = f"Error processing audio: {str(e)}"
	print(error_msg)
	return error_msg, None

	def create_interface():
	recognizer = EmotionRecognizer()

	with gr.Blocks(title="Audio Emotion Recognition") as interface:
	gr.Markdown("# 🎙️ Audio Emotion Recognition")
	gr.Markdown("Record or upload audio (English speech, 3-10 seconds)")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Input Audio",
	waveform_options={"waveform_progress_color": "#FF0066"}
	)
	submit_btn = gr.Button("Analyze", variant="primary")

	with gr.Column():
	text_output = gr.Textbox(
	label="Emotion Analysis Results",
	interactive=False
	)
	plot_output = gr.BarPlot(
	label="Confidence Scores",
	x="labels",
	y="values",
	color="labels",
	height=300
	)

	submit_btn.click(
	fn=recognizer.process_audio,
	inputs=audio_input,
	outputs=[text_output, plot_output]
	)

	gr.Examples(
	examples=[
	"https://huggingface.co/spaces/echalabres/emotion-recognition/raw/main/example_angry.wav",
	"https://huggingface.co/spaces/echalabres/emotion-recognition/raw/main/example_happy.wav"
	],
	inputs=audio_input,
	outputs=[text_output, plot_output],
	fn=recognizer.process_audio,
	cache_examples=True
	)

	return interface

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()