Mohssinibra commited on
Commit
3b0e26c
·
verified ·
1 Parent(s): a19085f

sentimentAnalysis

Browse files
Files changed (1) hide show
  1. app.py +31 -4
app.py CHANGED
@@ -32,6 +32,14 @@ bert_model_name = "bert-base-uncased"
32
  bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
33
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=3).to(device)
34
 
 
 
 
 
 
 
 
 
35
  # Libellés en Darija (Arabe et Latin)
36
  darija_topic_labels = [
37
  "مشكيل ف الشبكة (Mochkil f réseau)", # Problème de réseau
@@ -135,10 +143,13 @@ def transcribe_audio(audio):
135
  english_keyword_topic = classify_topic_by_keywords(translation,language='en' )
136
  #english_keyword_topic = classify_topic_by_keywords(translation )
137
 
138
- return transcription, translation, darija_topic, english_topic, darija_keyword_topic, english_keyword_topic
 
 
 
139
 
140
  except Exception as e:
141
- return f"Error processing audio: {str(e)}", "", "", "", "", ""
142
 
143
  def translate_text(text):
144
  """Translate Arabic text to English."""
@@ -156,6 +167,22 @@ def classify_topic(text, tokenizer, model, topic_labels):
156
 
157
  return topic_labels[predicted_class] if predicted_class < len(topic_labels) else "Other"
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # 🔹 Gradio Interface
160
  with gr.Blocks() as demo:
161
  gr.Markdown("# 🎙️ Speech-to-Text, Translation & Topic Classification")
@@ -169,11 +196,11 @@ with gr.Blocks() as demo:
169
  english_topic_output = gr.Textbox(label="English Topic Classification (BERT)")
170
  darija_keyword_topic_output = gr.Textbox(label="Darija Topic Classification (Keywords)")
171
  english_keyword_topic_output = gr.Textbox(label="English Topic Classification (Keywords)")
172
-
173
  submit_button.click(transcribe_audio,
174
  inputs=[audio_input],
175
  outputs=[transcription_output, translation_output,
176
  darija_topic_output, english_topic_output,
177
- darija_keyword_topic_output, english_keyword_topic_output])
178
 
179
  demo.launch()
 
32
  bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
33
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=3).to(device)
34
 
35
+ # Charger le modèle et le tokenizer Darija
36
+ sentiment_model_name = "BenhamdaneNawfal/sentiment-analysis-darija"
37
+ sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
38
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name).to("cuda" if torch.cuda.is_available() else "cpu")
39
+
40
+ # Labels du modèle (à modifier selon le modèle utilisé)
41
+ sentiment_labels = ["Négatif", "Neutre", "Positif"]
42
+
43
  # Libellés en Darija (Arabe et Latin)
44
  darija_topic_labels = [
45
  "مشكيل ف الشبكة (Mochkil f réseau)", # Problème de réseau
 
143
  english_keyword_topic = classify_topic_by_keywords(translation,language='en' )
144
  #english_keyword_topic = classify_topic_by_keywords(translation )
145
 
146
+ # l'analyse de sentiment
147
+ sentiment = analyze_sentiment(transcription)
148
+
149
+ return transcription, translation, darija_topic, english_topic, darija_keyword_topic, english_keyword_topic,sentiment
150
 
151
  except Exception as e:
152
+ return f"Error processing audio: {str(e)}", "", "", "", "", "", ""
153
 
154
  def translate_text(text):
155
  """Translate Arabic text to English."""
 
167
 
168
  return topic_labels[predicted_class] if predicted_class < len(topic_labels) else "Other"
169
 
170
+ def analyze_sentiment(text):
171
+ """Classifie le sentiment du texte en Darija."""
172
+ device = "cuda" if torch.cuda.is_available() else "cpu"
173
+
174
+ # Tokenizer le texte
175
+ inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
176
+
177
+ # Prédiction
178
+ with torch.no_grad():
179
+ outputs = sentiment_model(**inputs)
180
+ predicted_class = torch.argmax(outputs.logits, dim=1).item()
181
+
182
+ # Retourner la classe correspondante
183
+ return sentiment_labels[predicted_class] if predicted_class < len(sentiment_labels) else "Inconnu"
184
+
185
+
186
  # 🔹 Gradio Interface
187
  with gr.Blocks() as demo:
188
  gr.Markdown("# 🎙️ Speech-to-Text, Translation & Topic Classification")
 
196
  english_topic_output = gr.Textbox(label="English Topic Classification (BERT)")
197
  darija_keyword_topic_output = gr.Textbox(label="Darija Topic Classification (Keywords)")
198
  english_keyword_topic_output = gr.Textbox(label="English Topic Classification (Keywords)")
199
+ sentiment_output = gr.Textbox(label="Sentiment (Darija)")
200
  submit_button.click(transcribe_audio,
201
  inputs=[audio_input],
202
  outputs=[transcription_output, translation_output,
203
  darija_topic_output, english_topic_output,
204
+ darija_keyword_topic_output, english_keyword_topic_output, sentiment_output])
205
 
206
  demo.launch()