AA_Final2

Sleeping

App Files Files Community

ahm14 commited on Jan 29

Commit

706fc89

verified ·

1 Parent(s): e44cc10

Create app.py

Browse files

Files changed (1) hide show

app.py +201 -0

app.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import streamlit as st
+import re
+from langdetect import detect
+from transformers import pipeline
+import nltk
+from docx import Document
+import io
+# Download required NLTK resources
+nltk.download('punkt')
+# Updated tone categories
+tone_categories = {
+    "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis"],
+    "Critical": ["corrupt", "oppression", "failure", "repression", "unjust"],
+    "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
+    "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
+    "Informative": ["announcement", "event", "scheduled", "update", "details"],
+    "Positive": ["progress", "unity", "hope", "victory", "solidarity"],
+    "Urgent": ["urgent", "violence", "disappearances", "forced", "killing", "concern", "crisis"],
+    "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust"],
+    "Negative": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"],
+    "Empowering": ["rise", "resist", "mobilize", "inspire", "courage", "change"],
+    "Neutral": ["announcement", "event", "scheduled", "update", "details", "protest on"],
+    "Hopeful": ["progress", "unity", "hope", "victory", "together", "solidarity"]
+}
+# Updated frame categories
+frame_categories = {
+    "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
+    "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
+    "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
+    "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
+    "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
+    "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
+    "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
+    "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
+    "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
+    "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
+    "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
+    "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
+    "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
+    "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
+    "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
+}
+# Detect language
+def detect_language(text):
+    try:
+        return detect(text)
+    except Exception as e:
+        st.write(f"Error detecting language: {e}")
+        return "unknown"
+# Analyze tone based on predefined categories
+def analyze_tone(text):
+    detected_tones = set()
+    for category, keywords in tone_categories.items():
+        if any(word in text.lower() for word in keywords):
+            detected_tones.add(category)
+    if not detected_tones:
+        tone_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+        model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
+        detected_tones.update(model_result["labels"][:2])
+    return list(detected_tones)
+# Extract hashtags
+def extract_hashtags(text):
+    return re.findall(r"#\w+", text)
+# Extract frames based on predefined categories
+def extract_frames(text):
+    detected_frames = set()
+    for category, keywords in frame_categories.items():
+        if any(word in text.lower() for word in keywords):
+            detected_frames.add(category)
+    if not detected_frames:
+        frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+        model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
+        detected_frames.update(model_result["labels"][:2])
+    return list(detected_frames)
+# Extract captions from DOCX file based on "Post X"
+def extract_captions_from_docx(docx_file):
+    doc = Document(docx_file)
+    captions = {}
+    current_post = None
+    for para in doc.paragraphs:
+        text = para.text.strip()
+        if re.match(r"Post \d+", text, re.IGNORECASE):
+            current_post = text
+            captions[current_post] = []
+        elif current_post:
+            captions[current_post].append(text)
+    return {post: " ".join(lines) for post, lines in captions.items() if lines}
+# Generate a DOCX file in-memory with full captions
+def generate_docx(output_data):
+    doc = Document()
+    doc.add_heading('Activism Message Analysis', 0)
+    for index, (caption, result) in enumerate(output_data.items(), start=1):
+        doc.add_heading(f"{index}. {caption}", level=1)
+        doc.add_paragraph("Full Caption:")
+        doc.add_paragraph(result['Full Caption'], style="Quote")
+        doc.add_paragraph(f"Language: {result['Language']}")
+        doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
+        doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
+        doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
+        doc.add_heading('Frames:', level=2)
+        for frame in result['Frames']:
+            doc.add_paragraph(frame)
+    doc_io = io.BytesIO()
+    doc.save(doc_io)
+    doc_io.seek(0)
+    return doc_io
+# Streamlit app
+st.title('AI-Powered Activism Message Analyzer with Intersectionality')
+st.write("Enter the text to analyze or upload a DOCX file containing captions:")
+# Text Input
+input_text = st.text_area("Input Text", height=200)
+# File Upload
+uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
+# Initialize output dictionary
+output_data = {}
+if input_text:
+    language = detect_language(input_text)
+    tone = analyze_tone(input_text)
+    hashtags = extract_hashtags(input_text)
+    frames = extract_frames(input_text)
+    output_data["Manual Input"] = {
+        'Full Caption': input_text,
+        'Language': language,
+        'Tone of Caption': tone,
+        'Hashtags': hashtags,
+        'Hashtag Count': len(hashtags),
+        'Frames': frames
+    }
+    st.success("Analysis completed for text input.")
+if uploaded_file:
+    captions = extract_captions_from_docx(uploaded_file)
+    for caption, text in captions.items():
+        language = detect_language(text)
+        tone = analyze_tone(text)
+        hashtags = extract_hashtags(text)
+        frames = extract_frames(text)
+        output_data[caption] = {
+            'Full Caption': text,
+            'Language': language,
+            'Tone of Caption': tone,
+            'Hashtags': hashtags,
+            'Hashtag Count': len(hashtags),
+            'Frames': frames
+        }
+    st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
+# Display results
+if output_data:
+    with st.expander("Generated Output"):
+        st.subheader("Analysis Results")
+        for index, (caption, result) in enumerate(output_data.items(), start=1):
+            st.write(f"### {index}. {caption}")
+            st.write("**Full Caption:**")
+            st.write(f"> {result['Full Caption']}")
+            st.write(f"**Language**: {result['Language']}")
+            st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
+            st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
+            st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
+            st.write("**Frames**:")
+            for frame in result['Frames']:
+                st.write(f"- {frame}")
+    docx_file = generate_docx(output_data)
+    if docx_file:
+        st.download_button(
+            label="Download Analysis as DOCX",
+            data=docx_file,
+            file_name="activism_message_analysis.docx",
+            mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+        )