ahm14 commited on
Commit
706fc89
·
verified ·
1 Parent(s): e44cc10

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -0
app.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import re
3
+ from langdetect import detect
4
+ from transformers import pipeline
5
+ import nltk
6
+ from docx import Document
7
+ import io
8
+
9
+ # Download required NLTK resources
10
+ nltk.download('punkt')
11
+
12
+ # Updated tone categories
13
+ tone_categories = {
14
+ "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis"],
15
+ "Critical": ["corrupt", "oppression", "failure", "repression", "unjust"],
16
+ "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
17
+ "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
18
+ "Informative": ["announcement", "event", "scheduled", "update", "details"],
19
+ "Positive": ["progress", "unity", "hope", "victory", "solidarity"],
20
+ "Urgent": ["urgent", "violence", "disappearances", "forced", "killing", "concern", "crisis"],
21
+ "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust"],
22
+ "Negative": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
23
+ "Empowering": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
24
+ "Neutral": ["announcement", "event", "scheduled", "update", "details", "protest on"],
25
+ "Hopeful": ["progress", "unity", "hope", "victory", "together", "solidarity"]
26
+ }
27
+
28
+ # Updated frame categories
29
+ frame_categories = {
30
+ "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
31
+ "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
32
+ "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
33
+ "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
34
+ "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
35
+ "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
36
+ "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
37
+ "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
38
+ "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
39
+ "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
40
+ "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
41
+ "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
42
+ "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
43
+ "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
44
+ "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
45
+ }
46
+
47
+ # Detect language
48
+ def detect_language(text):
49
+ try:
50
+ return detect(text)
51
+ except Exception as e:
52
+ st.write(f"Error detecting language: {e}")
53
+ return "unknown"
54
+
55
+ # Analyze tone based on predefined categories
56
+ def analyze_tone(text):
57
+ detected_tones = set()
58
+ for category, keywords in tone_categories.items():
59
+ if any(word in text.lower() for word in keywords):
60
+ detected_tones.add(category)
61
+
62
+ if not detected_tones:
63
+ tone_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
64
+ model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
65
+ detected_tones.update(model_result["labels"][:2])
66
+
67
+ return list(detected_tones)
68
+
69
+ # Extract hashtags
70
+ def extract_hashtags(text):
71
+ return re.findall(r"#\w+", text)
72
+
73
+ # Extract frames based on predefined categories
74
+ def extract_frames(text):
75
+ detected_frames = set()
76
+ for category, keywords in frame_categories.items():
77
+ if any(word in text.lower() for word in keywords):
78
+ detected_frames.add(category)
79
+
80
+ if not detected_frames:
81
+ frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
82
+ model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
83
+ detected_frames.update(model_result["labels"][:2])
84
+
85
+ return list(detected_frames)
86
+
87
+ # Extract captions from DOCX file based on "Post X"
88
+ def extract_captions_from_docx(docx_file):
89
+ doc = Document(docx_file)
90
+ captions = {}
91
+ current_post = None
92
+ for para in doc.paragraphs:
93
+ text = para.text.strip()
94
+ if re.match(r"Post \d+", text, re.IGNORECASE):
95
+ current_post = text
96
+ captions[current_post] = []
97
+ elif current_post:
98
+ captions[current_post].append(text)
99
+
100
+ return {post: " ".join(lines) for post, lines in captions.items() if lines}
101
+
102
+ # Generate a DOCX file in-memory with full captions
103
+ def generate_docx(output_data):
104
+ doc = Document()
105
+ doc.add_heading('Activism Message Analysis', 0)
106
+
107
+ for index, (caption, result) in enumerate(output_data.items(), start=1):
108
+ doc.add_heading(f"{index}. {caption}", level=1)
109
+ doc.add_paragraph("Full Caption:")
110
+ doc.add_paragraph(result['Full Caption'], style="Quote")
111
+
112
+ doc.add_paragraph(f"Language: {result['Language']}")
113
+ doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
114
+ doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
115
+ doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
116
+
117
+ doc.add_heading('Frames:', level=2)
118
+ for frame in result['Frames']:
119
+ doc.add_paragraph(frame)
120
+
121
+ doc_io = io.BytesIO()
122
+ doc.save(doc_io)
123
+ doc_io.seek(0)
124
+
125
+ return doc_io
126
+
127
+ # Streamlit app
128
+ st.title('AI-Powered Activism Message Analyzer with Intersectionality')
129
+
130
+ st.write("Enter the text to analyze or upload a DOCX file containing captions:")
131
+
132
+ # Text Input
133
+ input_text = st.text_area("Input Text", height=200)
134
+
135
+ # File Upload
136
+ uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
137
+
138
+ # Initialize output dictionary
139
+ output_data = {}
140
+
141
+ if input_text:
142
+ language = detect_language(input_text)
143
+ tone = analyze_tone(input_text)
144
+ hashtags = extract_hashtags(input_text)
145
+ frames = extract_frames(input_text)
146
+
147
+ output_data["Manual Input"] = {
148
+ 'Full Caption': input_text,
149
+ 'Language': language,
150
+ 'Tone of Caption': tone,
151
+ 'Hashtags': hashtags,
152
+ 'Hashtag Count': len(hashtags),
153
+ 'Frames': frames
154
+ }
155
+
156
+ st.success("Analysis completed for text input.")
157
+
158
+ if uploaded_file:
159
+ captions = extract_captions_from_docx(uploaded_file)
160
+ for caption, text in captions.items():
161
+ language = detect_language(text)
162
+ tone = analyze_tone(text)
163
+ hashtags = extract_hashtags(text)
164
+ frames = extract_frames(text)
165
+
166
+ output_data[caption] = {
167
+ 'Full Caption': text,
168
+ 'Language': language,
169
+ 'Tone of Caption': tone,
170
+ 'Hashtags': hashtags,
171
+ 'Hashtag Count': len(hashtags),
172
+ 'Frames': frames
173
+ }
174
+
175
+ st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
176
+
177
+ # Display results
178
+ if output_data:
179
+ with st.expander("Generated Output"):
180
+ st.subheader("Analysis Results")
181
+ for index, (caption, result) in enumerate(output_data.items(), start=1):
182
+ st.write(f"### {index}. {caption}")
183
+ st.write("**Full Caption:**")
184
+ st.write(f"> {result['Full Caption']}")
185
+ st.write(f"**Language**: {result['Language']}")
186
+ st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
187
+ st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
188
+ st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
189
+ st.write("**Frames**:")
190
+ for frame in result['Frames']:
191
+ st.write(f"- {frame}")
192
+
193
+ docx_file = generate_docx(output_data)
194
+
195
+ if docx_file:
196
+ st.download_button(
197
+ label="Download Analysis as DOCX",
198
+ data=docx_file,
199
+ file_name="activism_message_analysis.docx",
200
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
201
+ )