|
import streamlit as st |
|
import re |
|
from langdetect import detect |
|
from transformers import pipeline |
|
import nltk |
|
from docx import Document |
|
import io |
|
|
|
|
|
nltk.download('punkt') |
|
|
|
|
|
try: |
|
tone_model = pipeline("zero-shot-classification", model="cross-encoder/nli-deberta-v3-large") |
|
except OSError: |
|
st.error("Failed to load tone analysis model. Please check internet connection or model availability.") |
|
|
|
try: |
|
frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") |
|
except OSError: |
|
st.error("Failed to load frame classification model. Please check internet connection or model availability.") |
|
|
|
|
|
tone_categories = [ |
|
"Emotional & Urgent", "Harsh & Critical", "Negative & Somber", |
|
"Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive" |
|
] |
|
|
|
|
|
frame_categories = [ |
|
"Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy", |
|
"Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism", |
|
"Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities" |
|
] |
|
|
|
|
|
def detect_language(text): |
|
try: |
|
return detect(text) |
|
except Exception: |
|
return "unknown" |
|
|
|
|
|
def analyze_tone(text): |
|
try: |
|
model_result = tone_model(text, candidate_labels=tone_categories) |
|
return model_result["labels"][:2] |
|
except Exception as e: |
|
st.error(f"Error analyzing tone: {e}") |
|
return ["Unknown"] |
|
|
|
|
|
def extract_frames(text): |
|
try: |
|
model_result = frame_model(text, candidate_labels=frame_categories) |
|
return model_result["labels"][:2] |
|
except Exception as e: |
|
st.error(f"Error extracting frames: {e}") |
|
return ["Unknown"] |
|
|
|
|
|
def extract_hashtags(text): |
|
return re.findall(r"#\w+", text) |
|
|
|
|
|
def extract_captions_from_docx(docx_file): |
|
doc = Document(docx_file) |
|
captions = {} |
|
current_post = None |
|
for para in doc.paragraphs: |
|
text = para.text.strip() |
|
if re.match(r"Post \d+", text, re.IGNORECASE): |
|
current_post = text |
|
captions[current_post] = [] |
|
elif current_post: |
|
captions[current_post].append(text) |
|
|
|
return {post: " ".join(lines) for post, lines in captions.items() if lines} |
|
|
|
|
|
def generate_docx(output_data): |
|
doc = Document() |
|
doc.add_heading('Activism Message Analysis', 0) |
|
|
|
for index, (caption, result) in enumerate(output_data.items(), start=1): |
|
doc.add_heading(f"{index}. {caption}", level=1) |
|
doc.add_paragraph("Full Caption:") |
|
doc.add_paragraph(result['Full Caption'], style="Quote") |
|
|
|
doc.add_paragraph(f"Language: {result['Language']}") |
|
doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}") |
|
doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}") |
|
doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}") |
|
|
|
doc.add_heading('Frames:', level=2) |
|
for frame in result['Frames']: |
|
doc.add_paragraph(frame) |
|
|
|
doc_io = io.BytesIO() |
|
doc.save(doc_io) |
|
doc_io.seek(0) |
|
|
|
return doc_io |
|
|
|
|
|
st.title('AI-Powered Activism Message Analyzer') |
|
|
|
st.write("Enter the text to analyze or upload a DOCX file containing captions:") |
|
|
|
|
|
input_text = st.text_area("Input Text", height=200) |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"]) |
|
|
|
|
|
output_data = {} |
|
|
|
if input_text: |
|
language = detect_language(input_text) |
|
tone = analyze_tone(input_text) |
|
hashtags = extract_hashtags(input_text) |
|
frames = extract_frames(input_text) |
|
|
|
output_data["Manual Input"] = { |
|
'Full Caption': input_text, |
|
'Language': language, |
|
'Tone of Caption': tone, |
|
'Hashtags': hashtags, |
|
'Hashtag Count': len(hashtags), |
|
'Frames': frames |
|
} |
|
|
|
st.success("Analysis completed for text input.") |
|
|
|
if uploaded_file: |
|
captions = extract_captions_from_docx(uploaded_file) |
|
for caption, text in captions.items(): |
|
language = detect_language(text) |
|
tone = analyze_tone(text) |
|
hashtags = extract_hashtags(text) |
|
frames = extract_frames(text) |
|
|
|
output_data[caption] = { |
|
'Full Caption': text, |
|
'Language': language, |
|
'Tone of Caption': tone, |
|
'Hashtags': hashtags, |
|
'Hashtag Count': len(hashtags), |
|
'Frames': frames |
|
} |
|
|
|
st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.") |
|
|
|
|
|
if output_data: |
|
with st.expander("Generated Output"): |
|
st.subheader("Analysis Results") |
|
for index, (caption, result) in enumerate(output_data.items(), start=1): |
|
st.write(f"### {index}. {caption}") |
|
st.write("**Full Caption:**") |
|
st.write(f"> {result['Full Caption']}") |
|
st.write(f"**Language**: {result['Language']}") |
|
st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}") |
|
st.write(f"**Number of Hashtags**: {result['Hashtag Count']}") |
|
st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}") |
|
st.write("**Frames**:") |
|
for frame in result['Frames']: |
|
st.write(f"- {frame}") |
|
|
|
docx_file = generate_docx(output_data) |
|
|
|
if docx_file: |
|
st.download_button( |
|
label="Download Analysis as DOCX", |
|
data=docx_file, |
|
file_name="activism_message_analysis.docx", |
|
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" |
|
) |
|
|