ahm14 commited on
Commit
c536f05
·
verified ·
1 Parent(s): 7ddff49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -77
app.py CHANGED
@@ -3,99 +3,45 @@ import re
3
  from langdetect import detect
4
  from transformers import pipeline
5
  import nltk
6
- from nltk.tokenize import word_tokenize
7
- from nltk.stem import WordNetLemmatizer
8
  from docx import Document
9
  import io
10
 
11
  # Download required NLTK resources
12
  nltk.download('punkt')
13
- nltk.download('wordnet')
14
 
15
- # Initialize Lemmatizer
16
- lemmatizer = WordNetLemmatizer()
17
-
18
- # Cache model to avoid reloading on every function call
19
- @st.cache_resource
20
- def load_pipeline():
21
- return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
22
-
23
- tone_model = load_pipeline()
24
- frame_model = load_pipeline()
25
 
26
  # Updated tone categories
27
- tone_categories = {
28
- "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis"],
29
- "Critical": ["corrupt", "oppression", "failure", "repression", "unjust"],
30
- "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
31
- "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
32
- "Informative": ["announcement", "event", "scheduled", "update", "details"],
33
- "Positive": ["progress", "unity", "hope", "victory", "solidarity"],
34
- "Urgent": ["urgent", "violence", "disappearances", "forced", "killing", "concern", "crisis"],
35
- "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust"],
36
- "Negative": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
37
- "Empowering": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
38
- "Neutral": ["announcement", "event", "scheduled", "update", "details", "protest on"],
39
- "Hopeful": ["progress", "unity", "hope", "victory", "together", "solidarity"]
40
- }
41
-
42
- # Updated frame categories (Limited to 4 selections)
43
- frame_categories = {
44
- "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
45
- "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
46
- "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
47
- "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
48
- "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
49
- "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
50
- "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
51
- "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
52
- "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
53
- "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
54
- "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
55
- "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
56
- "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
57
- "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
58
- "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
59
- }
60
-
61
- # Language detection
62
  def detect_language(text):
63
  try:
64
  return detect(text)
65
  except Exception:
66
  return "unknown"
67
 
68
- # NLP-based keyword matching with lemmatization
69
- def contains_keywords(text, keywords):
70
- words = word_tokenize(text.lower())
71
- lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
72
- return any(keyword in lemmatized_words for keyword in keywords)
73
-
74
- # Analyze tone based on predefined categories
75
  def analyze_tone(text):
76
- detected_tones = set()
77
- for category, keywords in tone_categories.items():
78
- if contains_keywords(text, keywords):
79
- detected_tones.add(category)
80
-
81
- if not detected_tones:
82
- model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
83
- detected_tones.update(model_result["labels"][:2])
84
 
85
- return list(detected_tones)
86
-
87
- # Extract frames based on predefined categories (Limit to 4)
88
  def extract_frames(text):
89
- detected_frames = set()
90
- for category, keywords in frame_categories.items():
91
- if contains_keywords(text, keywords):
92
- detected_frames.add(category)
93
-
94
- if not detected_frames:
95
- model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
96
- detected_frames.update(model_result["labels"][:4])
97
-
98
- return list(detected_frames)[:4] # Ensure no more than 4 frames are selected
99
 
100
  # Extract hashtags
101
  def extract_hashtags(text):
@@ -141,7 +87,7 @@ def generate_docx(output_data):
141
 
142
  return doc_io
143
 
144
- # Streamlit app
145
  st.title('AI-Powered Activism Message Analyzer')
146
 
147
  st.write("Enter the text to analyze or upload a DOCX file containing captions:")
 
3
  from langdetect import detect
4
  from transformers import pipeline
5
  import nltk
 
 
6
  from docx import Document
7
  import io
8
 
9
  # Download required NLTK resources
10
  nltk.download('punkt')
 
11
 
12
+ # Load AI models once to optimize performance
13
+ tone_model = pipeline("zero-shot-classification", model="facebook/roberta-large-mnli")
14
+ frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 
 
 
 
 
 
 
15
 
16
  # Updated tone categories
17
+ tone_categories = [
18
+ "Emotional & Urgent", "Harsh & Critical", "Negative & Somber",
19
+ "Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive"
20
+ ]
21
+
22
+ # Updated frame categories
23
+ frame_categories = [
24
+ "Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy",
25
+ "Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism",
26
+ "Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities"
27
+ ]
28
+
29
+ # Detect language
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def detect_language(text):
31
  try:
32
  return detect(text)
33
  except Exception:
34
  return "unknown"
35
 
36
+ # Analyze tone using RoBERTa model
 
 
 
 
 
 
37
  def analyze_tone(text):
38
+ model_result = tone_model(text, candidate_labels=tone_categories)
39
+ return model_result["labels"][:2] # Top 2 tone labels
 
 
 
 
 
 
40
 
41
+ # Extract frames using BART model
 
 
42
  def extract_frames(text):
43
+ model_result = frame_model(text, candidate_labels=frame_categories)
44
+ return model_result["labels"][:2] # Top 2 frame labels
 
 
 
 
 
 
 
 
45
 
46
  # Extract hashtags
47
  def extract_hashtags(text):
 
87
 
88
  return doc_io
89
 
90
+ # Streamlit app UI
91
  st.title('AI-Powered Activism Message Analyzer')
92
 
93
  st.write("Enter the text to analyze or upload a DOCX file containing captions:")