|
import streamlit as st |
|
import tempfile |
|
import os |
|
import cv2 |
|
import numpy as np |
|
import torch |
|
import librosa |
|
import speech_recognition as sr |
|
import noisereduce as nr |
|
import pandas as pd |
|
import plotly.express as px |
|
from deepface import DeepFace |
|
from pydub import AudioSegment |
|
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor |
|
|
|
|
|
AudioSegment.converter = "/usr/bin/ffmpeg" |
|
|
|
|
|
st.title("π€ AI Child Behavior Assessment") |
|
st.markdown( |
|
""" |
|
### How to Use: |
|
1οΈβ£ Choose an **analysis type** below. |
|
2οΈβ£ Upload the required file(s). |
|
3οΈβ£ Click the **Analyze** button to process the data. |
|
""" |
|
) |
|
|
|
|
|
st.write("β³ Loading AI Speech Model...") |
|
try: |
|
processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english") |
|
model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english") |
|
st.success("β
AI Speech Model Loaded!") |
|
except Exception as e: |
|
st.error(f"β Error loading speech model: {e}") |
|
|
|
|
|
def analyze_video(video_path): |
|
"""Processes video and extracts emotions with visualization""" |
|
st.write("π Analyzing Emotions in Video...") |
|
cap = cv2.VideoCapture(video_path) |
|
frame_count = 0 |
|
emotions_detected = [] |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
if frame_count % 10 == 0: |
|
try: |
|
analysis = DeepFace.analyze(frame, actions=['emotion'], enforce_detection=False) |
|
emotions_detected.append(analysis[0]['dominant_emotion']) |
|
except Exception as e: |
|
st.error(f"β οΈ DeepFace error: {e}") |
|
frame_count += 1 |
|
|
|
cap.release() |
|
if emotions_detected: |
|
most_common_emotion = max(set(emotions_detected), key=emotions_detected.count) |
|
st.success(f"π§ Most detected emotion: {most_common_emotion}") |
|
|
|
|
|
emotion_counts = pd.Series(emotions_detected).value_counts() |
|
emotion_df = pd.DataFrame({'Emotion': emotion_counts.index, 'Count': emotion_counts.values}) |
|
fig = px.bar(emotion_df, x='Emotion', y='Count', title="Emotion Distribution in Video", color='Emotion') |
|
st.plotly_chart(fig) |
|
else: |
|
st.warning("β οΈ No emotions detected. Try a different video.") |
|
|
|
|
|
def transcribe_audio(audio_path): |
|
"""Processes audio and extracts transcription with visualization""" |
|
try: |
|
st.write(f"π Processing Audio File...") |
|
speech, sr = librosa.load(audio_path, sr=16000) |
|
|
|
|
|
speech = nr.reduce_noise(y=speech, sr=sr, prop_decrease=0.4) |
|
speech = librosa.effects.trim(speech)[0] |
|
speech = librosa.util.normalize(speech) |
|
|
|
st.write("π€ Processing audio with AI model...") |
|
input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values |
|
|
|
with torch.no_grad(): |
|
logits = model(input_values).logits |
|
|
|
predicted_ids = torch.argmax(logits, dim=-1) |
|
transcription = processor.batch_decode(predicted_ids)[0] |
|
|
|
st.success(f"π Transcription (AI Model): {transcription}") |
|
|
|
|
|
word_count = pd.Series(transcription.split()).value_counts() |
|
word_df = pd.DataFrame({'Word': word_count.index, 'Count': word_count.values}) |
|
fig = px.bar(word_df, x='Word', y='Count', title="Word Frequency in Transcription", color='Word') |
|
st.plotly_chart(fig) |
|
except Exception as e: |
|
st.error(f"β οΈ Error in AI Speech Processing: {e}") |
|
|
|
|
|
analysis_option = st.radio( |
|
"Select Analysis Type:", |
|
["πΉ Video Only (Facial Emotion)", "π€ Audio Only (Speech Analysis)", "π¬ Video & Audio (Multimodal)"] |
|
) |
|
|
|
|
|
if analysis_option == "πΉ Video Only (Facial Emotion)": |
|
st.header("π Upload a Video for Emotion Analysis") |
|
video_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov"]) |
|
|
|
if video_file: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video: |
|
temp_video.write(video_file.read()) |
|
video_path = temp_video.name |
|
st.success("π Video uploaded successfully!") |
|
|
|
if st.button("Analyze Video"): |
|
analyze_video(video_path) |
|
|
|
|
|
elif analysis_option == "π€ Audio Only (Speech Analysis)": |
|
st.header("π€ Upload an Audio File for Speech Analysis") |
|
audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"]) |
|
|
|
if audio_file: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: |
|
temp_audio.write(audio_file.read()) |
|
audio_path = temp_audio.name |
|
st.success("π€ Audio uploaded successfully!") |
|
|
|
if st.button("Analyze Audio"): |
|
transcribe_audio(audio_path) |
|
|
|
|
|
elif analysis_option == "π¬ Video & Audio (Multimodal)": |
|
st.header("π₯ Upload a **Single File** for Video & Audio Combined Analysis") |
|
multimodal_file = st.file_uploader("Upload a **video file with audio**", type=["mp4", "avi", "mov"]) |
|
|
|
if multimodal_file: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file: |
|
temp_file.write(multimodal_file.read()) |
|
multimodal_path = temp_file.name |
|
|
|
st.success("β
Multimodal file uploaded successfully!") |
|
|
|
if st.button("Analyze Video & Audio Together"): |
|
def analyze_multimodal(multimodal_path): |
|
st.write("π Extracting Video & Audio...") |
|
|
|
|
|
video_emotions = analyze_video(multimodal_path) |
|
|
|
|
|
audio_transcription = transcribe_audio(multimodal_path) |
|
|
|
|
|
st.header("π Multimodal Analysis Results") |
|
if not video_emotions or not audio_transcription: |
|
st.error("β Could not extract both Video & Audio insights.") |
|
return |
|
|
|
|
|
speech_emotion = "Neutral" |
|
if any(word in audio_transcription.lower() for word in ["angry", "mad"]): |
|
speech_emotion = "Angry" |
|
elif any(word in audio_transcription.lower() for word in ["happy", "excited"]): |
|
speech_emotion = "Happy" |
|
elif any(word in audio_transcription.lower() for word in ["sad", "crying"]): |
|
speech_emotion = "Sad" |
|
|
|
fig = px.pie( |
|
names=["Video Emotion", "Speech Emotion"], |
|
values=[len(video_emotions), 1], |
|
title=f"Comparison: Video ({video_emotions[0]}) vs. Speech ({speech_emotion})" |
|
) |
|
st.plotly_chart(fig) |
|
|
|
analyze_multimodal(multimodal_path) |
|
|