import streamlit as st import re from langdetect import detect from transformers import pipeline import nltk from docx import Document import io # Download required NLTK resources nltk.download('punkt') # Load AI models once to optimize performance try: tone_model = pipeline("zero-shot-classification", model="cross-encoder/nli-deberta-v3-large") except OSError: st.error("Failed to load tone analysis model. Please check internet connection or model availability.") try: frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") except OSError: st.error("Failed to load frame classification model. Please check internet connection or model availability.") # Updated tone categories tone_categories = [ "Emotional & Urgent", "Harsh & Critical", "Negative & Somber", "Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive" ] # Updated frame categories frame_categories = [ "Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy", "Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism", "Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities" ] # Detect language def detect_language(text): try: return detect(text) except Exception: return "unknown" # Analyze tone using DeBERTa model def analyze_tone(text): try: model_result = tone_model(text, candidate_labels=tone_categories) return model_result["labels"][:2] # Top 2 tone labels except Exception as e: st.error(f"Error analyzing tone: {e}") return ["Unknown"] # Extract frames using BART model def extract_frames(text): try: model_result = frame_model(text, candidate_labels=frame_categories) return model_result["labels"][:2] # Top 2 frame labels except Exception as e: st.error(f"Error extracting frames: {e}") return ["Unknown"] # Extract hashtags def extract_hashtags(text): return re.findall(r"#\w+", text) # Extract captions from DOCX file def extract_captions_from_docx(docx_file): doc = Document(docx_file) captions = {} current_post = None for para in doc.paragraphs: text = para.text.strip() if re.match(r"Post \d+", text, re.IGNORECASE): current_post = text captions[current_post] = [] elif current_post: captions[current_post].append(text) return {post: " ".join(lines) for post, lines in captions.items() if lines} # Generate a DOCX file in-memory def generate_docx(output_data): doc = Document() doc.add_heading('Activism Message Analysis', 0) for index, (caption, result) in enumerate(output_data.items(), start=1): doc.add_heading(f"{index}. {caption}", level=1) doc.add_paragraph("Full Caption:") doc.add_paragraph(result['Full Caption'], style="Quote") doc.add_paragraph(f"Language: {result['Language']}") doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}") doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}") doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}") doc.add_heading('Frames:', level=2) for frame in result['Frames']: doc.add_paragraph(frame) doc_io = io.BytesIO() doc.save(doc_io) doc_io.seek(0) return doc_io # Streamlit app UI st.title('AI-Powered Activism Message Analyzer') st.write("Enter the text to analyze or upload a DOCX file containing captions:") # Text Input input_text = st.text_area("Input Text", height=200) # File Upload uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"]) # Initialize output dictionary output_data = {} if input_text: language = detect_language(input_text) tone = analyze_tone(input_text) hashtags = extract_hashtags(input_text) frames = extract_frames(input_text) output_data["Manual Input"] = { 'Full Caption': input_text, 'Language': language, 'Tone of Caption': tone, 'Hashtags': hashtags, 'Hashtag Count': len(hashtags), 'Frames': frames } st.success("Analysis completed for text input.") if uploaded_file: captions = extract_captions_from_docx(uploaded_file) for caption, text in captions.items(): language = detect_language(text) tone = analyze_tone(text) hashtags = extract_hashtags(text) frames = extract_frames(text) output_data[caption] = { 'Full Caption': text, 'Language': language, 'Tone of Caption': tone, 'Hashtags': hashtags, 'Hashtag Count': len(hashtags), 'Frames': frames } st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.") # Display results if output_data: with st.expander("Generated Output"): st.subheader("Analysis Results") for index, (caption, result) in enumerate(output_data.items(), start=1): st.write(f"### {index}. {caption}") st.write("**Full Caption:**") st.write(f"> {result['Full Caption']}") st.write(f"**Language**: {result['Language']}") st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}") st.write(f"**Number of Hashtags**: {result['Hashtag Count']}") st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}") st.write("**Frames**:") for frame in result['Frames']: st.write(f"- {frame}") docx_file = generate_docx(output_data) if docx_file: st.download_button( label="Download Analysis as DOCX", data=docx_file, file_name="activism_message_analysis.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" )