Spaces:
Sleeping
Sleeping
sentimentAnalysis
Browse files
app.py
CHANGED
@@ -32,6 +32,14 @@ bert_model_name = "bert-base-uncased"
|
|
32 |
bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
|
33 |
bert_model = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=3).to(device)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# Libellés en Darija (Arabe et Latin)
|
36 |
darija_topic_labels = [
|
37 |
"مشكيل ف الشبكة (Mochkil f réseau)", # Problème de réseau
|
@@ -135,10 +143,13 @@ def transcribe_audio(audio):
|
|
135 |
english_keyword_topic = classify_topic_by_keywords(translation,language='en' )
|
136 |
#english_keyword_topic = classify_topic_by_keywords(translation )
|
137 |
|
138 |
-
|
|
|
|
|
|
|
139 |
|
140 |
except Exception as e:
|
141 |
-
return f"Error processing audio: {str(e)}", "", "", "", "", ""
|
142 |
|
143 |
def translate_text(text):
|
144 |
"""Translate Arabic text to English."""
|
@@ -156,6 +167,22 @@ def classify_topic(text, tokenizer, model, topic_labels):
|
|
156 |
|
157 |
return topic_labels[predicted_class] if predicted_class < len(topic_labels) else "Other"
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
# 🔹 Gradio Interface
|
160 |
with gr.Blocks() as demo:
|
161 |
gr.Markdown("# 🎙️ Speech-to-Text, Translation & Topic Classification")
|
@@ -169,11 +196,11 @@ with gr.Blocks() as demo:
|
|
169 |
english_topic_output = gr.Textbox(label="English Topic Classification (BERT)")
|
170 |
darija_keyword_topic_output = gr.Textbox(label="Darija Topic Classification (Keywords)")
|
171 |
english_keyword_topic_output = gr.Textbox(label="English Topic Classification (Keywords)")
|
172 |
-
|
173 |
submit_button.click(transcribe_audio,
|
174 |
inputs=[audio_input],
|
175 |
outputs=[transcription_output, translation_output,
|
176 |
darija_topic_output, english_topic_output,
|
177 |
-
darija_keyword_topic_output, english_keyword_topic_output])
|
178 |
|
179 |
demo.launch()
|
|
|
32 |
bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
|
33 |
bert_model = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=3).to(device)
|
34 |
|
35 |
+
# Charger le modèle et le tokenizer Darija
|
36 |
+
sentiment_model_name = "BenhamdaneNawfal/sentiment-analysis-darija"
|
37 |
+
sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
|
38 |
+
sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name).to("cuda" if torch.cuda.is_available() else "cpu")
|
39 |
+
|
40 |
+
# Labels du modèle (à modifier selon le modèle utilisé)
|
41 |
+
sentiment_labels = ["Négatif", "Neutre", "Positif"]
|
42 |
+
|
43 |
# Libellés en Darija (Arabe et Latin)
|
44 |
darija_topic_labels = [
|
45 |
"مشكيل ف الشبكة (Mochkil f réseau)", # Problème de réseau
|
|
|
143 |
english_keyword_topic = classify_topic_by_keywords(translation,language='en' )
|
144 |
#english_keyword_topic = classify_topic_by_keywords(translation )
|
145 |
|
146 |
+
# l'analyse de sentiment
|
147 |
+
sentiment = analyze_sentiment(transcription)
|
148 |
+
|
149 |
+
return transcription, translation, darija_topic, english_topic, darija_keyword_topic, english_keyword_topic,sentiment
|
150 |
|
151 |
except Exception as e:
|
152 |
+
return f"Error processing audio: {str(e)}", "", "", "", "", "", ""
|
153 |
|
154 |
def translate_text(text):
|
155 |
"""Translate Arabic text to English."""
|
|
|
167 |
|
168 |
return topic_labels[predicted_class] if predicted_class < len(topic_labels) else "Other"
|
169 |
|
170 |
+
def analyze_sentiment(text):
|
171 |
+
"""Classifie le sentiment du texte en Darija."""
|
172 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
173 |
+
|
174 |
+
# Tokenizer le texte
|
175 |
+
inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
|
176 |
+
|
177 |
+
# Prédiction
|
178 |
+
with torch.no_grad():
|
179 |
+
outputs = sentiment_model(**inputs)
|
180 |
+
predicted_class = torch.argmax(outputs.logits, dim=1).item()
|
181 |
+
|
182 |
+
# Retourner la classe correspondante
|
183 |
+
return sentiment_labels[predicted_class] if predicted_class < len(sentiment_labels) else "Inconnu"
|
184 |
+
|
185 |
+
|
186 |
# 🔹 Gradio Interface
|
187 |
with gr.Blocks() as demo:
|
188 |
gr.Markdown("# 🎙️ Speech-to-Text, Translation & Topic Classification")
|
|
|
196 |
english_topic_output = gr.Textbox(label="English Topic Classification (BERT)")
|
197 |
darija_keyword_topic_output = gr.Textbox(label="Darija Topic Classification (Keywords)")
|
198 |
english_keyword_topic_output = gr.Textbox(label="English Topic Classification (Keywords)")
|
199 |
+
sentiment_output = gr.Textbox(label="Sentiment (Darija)")
|
200 |
submit_button.click(transcribe_audio,
|
201 |
inputs=[audio_input],
|
202 |
outputs=[transcription_output, translation_output,
|
203 |
darija_topic_output, english_topic_output,
|
204 |
+
darija_keyword_topic_output, english_keyword_topic_output, sentiment_output])
|
205 |
|
206 |
demo.launch()
|