Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ from pydub import AudioSegment
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import io
|
8 |
-
from
|
9 |
import numpy as np
|
10 |
import wave
|
11 |
import matplotlib.pyplot as plt
|
@@ -55,11 +55,17 @@ def transcribe_audio(audio_file):
|
|
55 |
except sr.RequestError:
|
56 |
return "Could not request results from Google Speech Recognition service."
|
57 |
|
58 |
-
# Function
|
59 |
-
def
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
# Function to visualize audio waveform
|
65 |
def plot_waveform(audio_file):
|
@@ -73,8 +79,8 @@ def plot_waveform(audio_file):
|
|
73 |
st.pyplot(plt)
|
74 |
|
75 |
# Streamlit app layout
|
76 |
-
st.title("Video and Audio to Text Transcription with
|
77 |
-
st.write("Upload a video or audio file to convert it to transcription,
|
78 |
|
79 |
# Create tabs to separate video and audio uploads
|
80 |
tab = st.selectbox("Select the type of file to upload", ["Video", "Audio"])
|
@@ -105,9 +111,9 @@ if tab == "Video":
|
|
105 |
# Show the transcription
|
106 |
st.text_area("Transcription", transcription, height=300)
|
107 |
|
108 |
-
#
|
109 |
-
|
110 |
-
st.write(f"
|
111 |
|
112 |
# Plot the audio waveform
|
113 |
st.subheader("Audio Waveform Visualization")
|
@@ -173,9 +179,9 @@ elif tab == "Audio":
|
|
173 |
# Show the transcription
|
174 |
st.text_area("Transcription", transcription, height=300)
|
175 |
|
176 |
-
#
|
177 |
-
|
178 |
-
st.write(f"
|
179 |
|
180 |
# Plot the audio waveform
|
181 |
st.subheader("Audio Waveform Visualization")
|
@@ -212,4 +218,4 @@ elif tab == "Audio":
|
|
212 |
data=st.session_state.wav_audio_file_audio,
|
213 |
file_name="converted_audio_audio.wav",
|
214 |
mime="audio/wav"
|
215 |
-
)
|
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import io
|
8 |
+
from transformers import pipeline
|
9 |
import numpy as np
|
10 |
import wave
|
11 |
import matplotlib.pyplot as plt
|
|
|
55 |
except sr.RequestError:
|
56 |
return "Could not request results from Google Speech Recognition service."
|
57 |
|
58 |
+
# Function to perform emotion detection using Hugging Face transformers
|
59 |
+
def detect_emotion(text):
|
60 |
+
# Load emotion detection pipeline
|
61 |
+
emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
|
62 |
+
|
63 |
+
# Get the emotion predictions
|
64 |
+
result = emotion_pipeline(text)
|
65 |
+
|
66 |
+
# Extract the emotion with the highest score
|
67 |
+
emotions = {emotion['label']: emotion['score'] for emotion in result[0]}
|
68 |
+
return emotions
|
69 |
|
70 |
# Function to visualize audio waveform
|
71 |
def plot_waveform(audio_file):
|
|
|
79 |
st.pyplot(plt)
|
80 |
|
81 |
# Streamlit app layout
|
82 |
+
st.title("Video and Audio to Text Transcription with Emotion Detection and Visualization")
|
83 |
+
st.write("Upload a video or audio file to convert it to transcription, detect emotions, and visualize the audio waveform.")
|
84 |
|
85 |
# Create tabs to separate video and audio uploads
|
86 |
tab = st.selectbox("Select the type of file to upload", ["Video", "Audio"])
|
|
|
111 |
# Show the transcription
|
112 |
st.text_area("Transcription", transcription, height=300)
|
113 |
|
114 |
+
# Emotion detection
|
115 |
+
emotions = detect_emotion(transcription)
|
116 |
+
st.write(f"Detected Emotions: {emotions}")
|
117 |
|
118 |
# Plot the audio waveform
|
119 |
st.subheader("Audio Waveform Visualization")
|
|
|
179 |
# Show the transcription
|
180 |
st.text_area("Transcription", transcription, height=300)
|
181 |
|
182 |
+
# Emotion detection
|
183 |
+
emotions = detect_emotion(transcription)
|
184 |
+
st.write(f"Detected Emotions: {emotions}")
|
185 |
|
186 |
# Plot the audio waveform
|
187 |
st.subheader("Audio Waveform Visualization")
|
|
|
218 |
data=st.session_state.wav_audio_file_audio,
|
219 |
file_name="converted_audio_audio.wav",
|
220 |
mime="audio/wav"
|
221 |
+
)
|