Boltz79 commited on
Commit
f52a928
·
verified ·
1 Parent(s): 9460d34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -39
app.py CHANGED
@@ -1,53 +1,116 @@
1
  import gradio as gr
2
  from transformers import pipeline
 
3
 
4
- # Load Whisper for speech-to-text
5
- whisper = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
6
-
7
- # Load a sentiment analysis model
8
- sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Function to process audio and analyze tone
11
- def analyze_call(audio_file):
 
 
 
12
  try:
13
- # Step 1: Transcribe audio to text using Whisper
14
- transcription = whisper(audio_file)["text"]
 
 
15
 
16
- # Step 2: Analyze sentiment of the transcription
17
- sentiment_result = sentiment_analyzer(transcription)[0]
18
 
19
- # Prepare the output
20
- output = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "transcription": transcription,
22
- "sentiment": sentiment_result["label"],
23
- "confidence": round(sentiment_result["score"], 4)
24
  }
25
- return output
26
  except Exception as e:
27
- return {"error": str(e)}
 
 
 
 
28
 
29
- # Gradio Interface
30
- def gradio_interface(audio):
31
- if audio is None:
32
- return "Please record or upload an audio file."
33
- result = analyze_call(audio)
34
- if "error" in result:
35
- return f"Error: {result['error']}"
36
- return (
37
- f"**Transcription:** {result['transcription']}\n\n"
38
- f"**Sentiment:** {result['sentiment']}\n\n"
39
- f"**Confidence:** {result['confidence']}"
 
 
 
 
 
 
 
 
 
 
 
40
  )
41
 
42
- # Create Gradio app
43
- interface = gr.Interface(
44
- fn=gradio_interface,
45
- inputs=gr.Audio(type="filepath", label="Record or Upload Audio"),
46
- outputs=gr.Textbox(label="Analysis Result", lines=5),
47
- title="Real-Time Call Analysis",
48
- description="Record or upload audio to analyze tone and sentiment in real time.",
49
- live=False # Set to False to avoid constant re-runs
50
- )
51
 
52
- # Launch the app
53
- interface.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import numpy as np
4
 
5
+ def create_speech_analyzer():
6
+ """Initialize the speech-to-text and sentiment analysis models"""
7
+ try:
8
+ # Load Faster Whisper for improved speech recognition
9
+ transcriber = pipeline(
10
+ "automatic-speech-recognition",
11
+ model="openai/whisper-small", # Using smaller model for faster processing
12
+ max_new_tokens=128
13
+ )
14
+
15
+ # Load RoBERTa model for more nuanced sentiment analysis
16
+ sentiment_model = pipeline(
17
+ "sentiment-analysis",
18
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest"
19
+ )
20
+
21
+ return transcriber, sentiment_model
22
+
23
+ except Exception as e:
24
+ raise RuntimeError(f"Error loading models: {str(e)}")
25
 
26
+ def analyze_speech(audio_file):
27
+ """
28
+ Analyze speech for transcription and emotional content.
29
+ Returns both the transcription and detailed sentiment analysis.
30
+ """
31
  try:
32
+ transcriber, sentiment_model = create_speech_analyzer()
33
+
34
+ # Get transcription
35
+ transcription = transcriber(audio_file)["text"]
36
 
37
+ # RoBERTa provides more detailed sentiment analysis
38
+ sentiment_result = sentiment_model(transcription)[0]
39
 
40
+ # Map sentiment labels to more readable format
41
+ sentiment_mapping = {
42
+ 'LABEL_0': 'Negative',
43
+ 'LABEL_1': 'Neutral',
44
+ 'LABEL_2': 'Positive'
45
+ }
46
+
47
+ # Get the sentiment label and score
48
+ sentiment = sentiment_mapping.get(sentiment_result['label'], sentiment_result['label'])
49
+ confidence = sentiment_result['score']
50
+
51
+ # Analyze sentiment of smaller chunks for longer texts
52
+ if len(transcription.split()) > 50:
53
+ # Split into sentences or chunks
54
+ chunks = transcription.split('.')
55
+ chunk_sentiments = [sentiment_model(chunk)[0] for chunk in chunks if len(chunk.strip()) > 0]
56
+
57
+ # Calculate average sentiment
58
+ avg_sentiment = np.mean([s['score'] for s in chunk_sentiments])
59
+ sentiment_variation = np.std([s['score'] for s in chunk_sentiments])
60
+
61
+ detailed_analysis = (
62
+ f"Overall Sentiment: {sentiment} ({confidence:.2%})\n"
63
+ f"Sentiment Stability: {1 - sentiment_variation:.2%}\n"
64
+ f"Text chunks analyzed: {len(chunk_sentiments)}"
65
+ )
66
+ else:
67
+ detailed_analysis = f"Sentiment: {sentiment} ({confidence:.2%})"
68
+
69
+ return {
70
  "transcription": transcription,
71
+ "sentiment": sentiment,
72
+ "analysis": detailed_analysis
73
  }
74
+
75
  except Exception as e:
76
+ return {
77
+ "transcription": f"Error in processing: {str(e)}",
78
+ "sentiment": "ERROR",
79
+ "analysis": "Analysis failed"
80
+ }
81
 
82
+ def create_interface():
83
+ """Create and configure the Gradio interface"""
84
+ return gr.Interface(
85
+ fn=analyze_speech,
86
+ inputs=gr.Audio(
87
+ sources=["microphone", "upload"],
88
+ type="filepath",
89
+ label="Upload or Record Audio"
90
+ ),
91
+ outputs=[
92
+ gr.Textbox(label="Transcription", lines=3),
93
+ gr.Textbox(label="Sentiment"),
94
+ gr.Textbox(label="Detailed Analysis", lines=3)
95
+ ],
96
+ title="Advanced Speech Sentiment Analyzer",
97
+ description="""
98
+ This tool performs advanced sentiment analysis on speech using RoBERTa.
99
+ It provides detailed sentiment analysis for longer texts and handles
100
+ both audio uploads and microphone recordings.
101
+ """,
102
+ theme=gr.themes.Soft(),
103
+ examples=[]
104
  )
105
 
106
+ def main():
107
+ """Launch the application"""
108
+ interface = create_interface()
109
+ interface.launch(
110
+ share=True,
111
+ debug=True,
112
+ server_name="0.0.0.0"
113
+ )
 
114
 
115
+ if __name__ == "__main__":
116
+ main()