AA_Final2

Sleeping

App Files Files Community

ahm14 commited on Jan 29

Commit

c536f05

verified ·

1 Parent(s): 7ddff49

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -77

app.py CHANGED Viewed

@@ -3,99 +3,45 @@ import re
 from langdetect import detect
 from transformers import pipeline
 import nltk
-from nltk.tokenize import word_tokenize
-from nltk.stem import WordNetLemmatizer
 from docx import Document
 import io
 # Download required NLTK resources
 nltk.download('punkt')
-nltk.download('wordnet')
-# Initialize Lemmatizer
-lemmatizer = WordNetLemmatizer()
-# Cache model to avoid reloading on every function call
-@st.cache_resource
-def load_pipeline():
-    return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-tone_model = load_pipeline()
-frame_model = load_pipeline()
 # Updated tone categories
-tone_categories = {
-    "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis"],
-    "Critical": ["corrupt", "oppression", "failure", "repression", "unjust"],
-    "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
-    "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
-    "Informative": ["announcement", "event", "scheduled", "update", "details"],
-    "Positive": ["progress", "unity", "hope", "victory", "solidarity"],
-    "Urgent": ["urgent", "violence", "disappearances", "forced", "killing", "concern", "crisis"],
-    "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust"],
-    "Negative": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
-    "Empowering": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
-    "Neutral": ["announcement", "event", "scheduled", "update", "details", "protest on"],
-    "Hopeful": ["progress", "unity", "hope", "victory", "together", "solidarity"]
-}
-# Updated frame categories (Limited to 4 selections)
-frame_categories = {
-    "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
-    "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
-    "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
-    "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
-    "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
-    "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
-    "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
-    "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
-    "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
-    "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
-    "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
-    "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
-    "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
-    "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
-    "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
-}
-# Language detection
 def detect_language(text):
     try:
         return detect(text)
     except Exception:
         return "unknown"
-# NLP-based keyword matching with lemmatization
-def contains_keywords(text, keywords):
-    words = word_tokenize(text.lower())
-    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
-    return any(keyword in lemmatized_words for keyword in keywords)
-# Analyze tone based on predefined categories
 def analyze_tone(text):
-    detected_tones = set()
-    for category, keywords in tone_categories.items():
-        if contains_keywords(text, keywords):
-            detected_tones.add(category)
-    if not detected_tones:
-        model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
-        detected_tones.update(model_result["labels"][:2])
-    return list(detected_tones)
-# Extract frames based on predefined categories (Limit to 4)
 def extract_frames(text):
-    detected_frames = set()
-    for category, keywords in frame_categories.items():
-        if contains_keywords(text, keywords):
-            detected_frames.add(category)
-    if not detected_frames:
-        model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
-        detected_frames.update(model_result["labels"][:4])
-    return list(detected_frames)[:4]  # Ensure no more than 4 frames are selected
 # Extract hashtags
 def extract_hashtags(text):
@@ -141,7 +87,7 @@ def generate_docx(output_data):
     return doc_io
-# Streamlit app
 st.title('AI-Powered Activism Message Analyzer')
 st.write("Enter the text to analyze or upload a DOCX file containing captions:")

 from langdetect import detect
 from transformers import pipeline
 import nltk
 from docx import Document
 import io
 # Download required NLTK resources
 nltk.download('punkt')
+# Load AI models once to optimize performance
+tone_model = pipeline("zero-shot-classification", model="facebook/roberta-large-mnli")
+frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 # Updated tone categories
+tone_categories = [
+    "Emotional & Urgent", "Harsh & Critical", "Negative & Somber",
+    "Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive"
+]
+# Updated frame categories
+frame_categories = [
+    "Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy",
+    "Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism",
+    "Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities"
+]
+# Detect language
 def detect_language(text):
     try:
         return detect(text)
     except Exception:
         return "unknown"
+# Analyze tone using RoBERTa model
 def analyze_tone(text):
+    model_result = tone_model(text, candidate_labels=tone_categories)
+    return model_result["labels"][:2]  # Top 2 tone labels
+# Extract frames using BART model
 def extract_frames(text):
+    model_result = frame_model(text, candidate_labels=frame_categories)
+    return model_result["labels"][:2]  # Top 2 frame labels
 # Extract hashtags
 def extract_hashtags(text):
     return doc_io
+# Streamlit app UI
 st.title('AI-Powered Activism Message Analyzer')
 st.write("Enter the text to analyze or upload a DOCX file containing captions:")