Spaces:
Sleeping
Sleeping
import streamlit as st | |
import re | |
from langdetect import detect | |
from transformers import pipeline | |
import nltk | |
from docx import Document | |
import io | |
# Download required NLTK resources | |
nltk.download('punkt') | |
# Updated tone categories | |
tone_categories = { | |
"Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis"], | |
"Critical": ["corrupt", "oppression", "failure", "repression", "unjust"], | |
"Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"], | |
"Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change"], | |
"Informative": ["announcement", "event", "scheduled", "update", "details"], | |
"Positive": ["progress", "unity", "hope", "victory", "solidarity"], | |
"Urgent": ["urgent", "violence", "disappearances", "forced", "killing", "concern", "crisis"], | |
"Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust"], | |
"Negative": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"], | |
"Empowering": ["rise", "resist", "mobilize", "inspire", "courage", "change"], | |
"Neutral": ["announcement", "event", "scheduled", "update", "details", "protest on"], | |
"Hopeful": ["progress", "unity", "hope", "victory", "together", "solidarity"] | |
} | |
# Updated frame categories | |
frame_categories = { | |
"Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"], | |
"Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"], | |
"Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"], | |
"Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"], | |
"Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"], | |
"Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"], | |
"Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"], | |
"Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"], | |
"Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"], | |
"Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"], | |
"Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"], | |
"Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"], | |
"Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"], | |
"Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"], | |
"Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"] | |
} | |
# Detect language | |
def detect_language(text): | |
try: | |
return detect(text) | |
except Exception as e: | |
st.write(f"Error detecting language: {e}") | |
return "unknown" | |
# Analyze tone based on predefined categories | |
def analyze_tone(text): | |
detected_tones = set() | |
for category, keywords in tone_categories.items(): | |
if any(word in text.lower() for word in keywords): | |
detected_tones.add(category) | |
if not detected_tones: | |
tone_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
model_result = tone_model(text, candidate_labels=list(tone_categories.keys())) | |
detected_tones.update(model_result["labels"][:2]) | |
return list(detected_tones) | |
# Extract hashtags | |
def extract_hashtags(text): | |
return re.findall(r"#\w+", text) | |
# Extract frames based on predefined categories | |
def extract_frames(text): | |
detected_frames = set() | |
for category, keywords in frame_categories.items(): | |
if any(word in text.lower() for word in keywords): | |
detected_frames.add(category) | |
if not detected_frames: | |
frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
model_result = frame_model(text, candidate_labels=list(frame_categories.keys())) | |
detected_frames.update(model_result["labels"][:2]) | |
return list(detected_frames) | |
# Extract captions from DOCX file based on "Post X" | |
def extract_captions_from_docx(docx_file): | |
doc = Document(docx_file) | |
captions = {} | |
current_post = None | |
for para in doc.paragraphs: | |
text = para.text.strip() | |
if re.match(r"Post \d+", text, re.IGNORECASE): | |
current_post = text | |
captions[current_post] = [] | |
elif current_post: | |
captions[current_post].append(text) | |
return {post: " ".join(lines) for post, lines in captions.items() if lines} | |
# Generate a DOCX file in-memory with full captions | |
def generate_docx(output_data): | |
doc = Document() | |
doc.add_heading('Activism Message Analysis', 0) | |
for index, (caption, result) in enumerate(output_data.items(), start=1): | |
doc.add_heading(f"{index}. {caption}", level=1) | |
doc.add_paragraph("Full Caption:") | |
doc.add_paragraph(result['Full Caption'], style="Quote") | |
doc.add_paragraph(f"Language: {result['Language']}") | |
doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}") | |
doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}") | |
doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}") | |
doc.add_heading('Frames:', level=2) | |
for frame in result['Frames']: | |
doc.add_paragraph(frame) | |
doc_io = io.BytesIO() | |
doc.save(doc_io) | |
doc_io.seek(0) | |
return doc_io | |
# Streamlit app | |
st.title('AI-Powered Activism Message Analyzer with Intersectionality') | |
st.write("Enter the text to analyze or upload a DOCX file containing captions:") | |
# Text Input | |
input_text = st.text_area("Input Text", height=200) | |
# File Upload | |
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"]) | |
# Initialize output dictionary | |
output_data = {} | |
if input_text: | |
language = detect_language(input_text) | |
tone = analyze_tone(input_text) | |
hashtags = extract_hashtags(input_text) | |
frames = extract_frames(input_text) | |
output_data["Manual Input"] = { | |
'Full Caption': input_text, | |
'Language': language, | |
'Tone of Caption': tone, | |
'Hashtags': hashtags, | |
'Hashtag Count': len(hashtags), | |
'Frames': frames | |
} | |
st.success("Analysis completed for text input.") | |
if uploaded_file: | |
captions = extract_captions_from_docx(uploaded_file) | |
for caption, text in captions.items(): | |
language = detect_language(text) | |
tone = analyze_tone(text) | |
hashtags = extract_hashtags(text) | |
frames = extract_frames(text) | |
output_data[caption] = { | |
'Full Caption': text, | |
'Language': language, | |
'Tone of Caption': tone, | |
'Hashtags': hashtags, | |
'Hashtag Count': len(hashtags), | |
'Frames': frames | |
} | |
st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.") | |
# Display results | |
if output_data: | |
with st.expander("Generated Output"): | |
st.subheader("Analysis Results") | |
for index, (caption, result) in enumerate(output_data.items(), start=1): | |
st.write(f"### {index}. {caption}") | |
st.write("**Full Caption:**") | |
st.write(f"> {result['Full Caption']}") | |
st.write(f"**Language**: {result['Language']}") | |
st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}") | |
st.write(f"**Number of Hashtags**: {result['Hashtag Count']}") | |
st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}") | |
st.write("**Frames**:") | |
for frame in result['Frames']: | |
st.write(f"- {frame}") | |
docx_file = generate_docx(output_data) | |
if docx_file: | |
st.download_button( | |
label="Download Analysis as DOCX", | |
data=docx_file, | |
file_name="activism_message_analysis.docx", | |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
) |