Spaces:

Mohssinibra
/

STTDARIJAAPI

Sleeping

App Files Files Community

Mohssinibra commited on Feb 10

Commit

3b0e26c

verified ·

1 Parent(s): a19085f

sentimentAnalysis

Browse files

Files changed (1) hide show

app.py +31 -4

app.py CHANGED Viewed

@@ -32,6 +32,14 @@ bert_model_name = "bert-base-uncased"
 bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
 bert_model = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=3).to(device)
 # Libellés en Darija (Arabe et Latin)
 darija_topic_labels = [
     "مشكيل ف الشبكة (Mochkil f réseau)",        # Problème de réseau
@@ -135,10 +143,13 @@ def transcribe_audio(audio):
         english_keyword_topic = classify_topic_by_keywords(translation,language='en' )
         #english_keyword_topic = classify_topic_by_keywords(translation )
-        return transcription, translation, darija_topic, english_topic, darija_keyword_topic, english_keyword_topic
     except Exception as e:
-        return f"Error processing audio: {str(e)}", "", "", "", "", ""
 def translate_text(text):
     """Translate Arabic text to English."""
@@ -156,6 +167,22 @@ def classify_topic(text, tokenizer, model, topic_labels):
     return topic_labels[predicted_class] if predicted_class < len(topic_labels) else "Other"
 # 🔹 Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("# 🎙️ Speech-to-Text, Translation & Topic Classification")
@@ -169,11 +196,11 @@ with gr.Blocks() as demo:
     english_topic_output = gr.Textbox(label="English Topic Classification (BERT)")
     darija_keyword_topic_output = gr.Textbox(label="Darija Topic Classification (Keywords)")
     english_keyword_topic_output = gr.Textbox(label="English Topic Classification (Keywords)")
     submit_button.click(transcribe_audio,
                         inputs=[audio_input],
                         outputs=[transcription_output, translation_output,
                                  darija_topic_output, english_topic_output,
-                                 darija_keyword_topic_output, english_keyword_topic_output])
 demo.launch()

 bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
 bert_model = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=3).to(device)
+# Charger le modèle et le tokenizer Darija
+sentiment_model_name = "BenhamdaneNawfal/sentiment-analysis-darija"
+sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
+sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name).to("cuda" if torch.cuda.is_available() else "cpu")
+# Labels du modèle (à modifier selon le modèle utilisé)
+sentiment_labels = ["Négatif", "Neutre", "Positif"]
 # Libellés en Darija (Arabe et Latin)
 darija_topic_labels = [
     "مشكيل ف الشبكة (Mochkil f réseau)",        # Problème de réseau
         english_keyword_topic = classify_topic_by_keywords(translation,language='en' )
         #english_keyword_topic = classify_topic_by_keywords(translation )
+        #  l'analyse de sentiment
+        sentiment = analyze_sentiment(transcription)
+        return transcription, translation, darija_topic, english_topic, darija_keyword_topic, english_keyword_topic,sentiment
     except Exception as e:
+        return f"Error processing audio: {str(e)}", "", "", "", "", "", ""
 def translate_text(text):
     """Translate Arabic text to English."""
     return topic_labels[predicted_class] if predicted_class < len(topic_labels) else "Other"
+def analyze_sentiment(text):
+    """Classifie le sentiment du texte en Darija."""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Tokenizer le texte
+    inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
+    # Prédiction
+    with torch.no_grad():
+        outputs = sentiment_model(**inputs)
+        predicted_class = torch.argmax(outputs.logits, dim=1).item()
+    # Retourner la classe correspondante
+    return sentiment_labels[predicted_class] if predicted_class < len(sentiment_labels) else "Inconnu"
 # 🔹 Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("# 🎙️ Speech-to-Text, Translation & Topic Classification")
     english_topic_output = gr.Textbox(label="English Topic Classification (BERT)")
     darija_keyword_topic_output = gr.Textbox(label="Darija Topic Classification (Keywords)")
     english_keyword_topic_output = gr.Textbox(label="English Topic Classification (Keywords)")
+    sentiment_output = gr.Textbox(label="Sentiment (Darija)")
     submit_button.click(transcribe_audio,
                         inputs=[audio_input],
                         outputs=[transcription_output, translation_output,
                                  darija_topic_output, english_topic_output,
+                                 darija_keyword_topic_output, english_keyword_topic_output, sentiment_output])
 demo.launch()