similarity-checker

Sleeping

App Files Files Community

similarity-checker / app.py

ahm14

Update app.py

9943898 verified 6 months ago

raw

history blame

5.96 kB

	import streamlit as st
	import re
	from langdetect import detect
	from transformers import pipeline
	import nltk
	from docx import Document
	import io

	# Download required NLTK resources
	nltk.download('punkt')

	# Load AI models once to optimize performance
	try:
	tone_model = pipeline("zero-shot-classification", model="cross-encoder/nli-deberta-v3-large")
	except OSError:
	st.error("Failed to load tone analysis model. Please check internet connection or model availability.")

	try:
	frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
	except OSError:
	st.error("Failed to load frame classification model. Please check internet connection or model availability.")

	# Updated tone categories
	tone_categories = [
	"Emotional & Urgent", "Harsh & Critical", "Negative & Somber",
	"Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive"
	]

	# Updated frame categories
	frame_categories = [
	"Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy",
	"Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism",
	"Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities"
	]

	# Detect language
	def detect_language(text):
	try:
	return detect(text)
	except Exception:
	return "unknown"

	# Analyze tone using DeBERTa model
	def analyze_tone(text):
	try:
	model_result = tone_model(text, candidate_labels=tone_categories)
	return model_result["labels"][:2] # Top 2 tone labels
	except Exception as e:
	st.error(f"Error analyzing tone: {e}")
	return ["Unknown"]

	# Extract frames using BART model
	def extract_frames(text):
	try:
	model_result = frame_model(text, candidate_labels=frame_categories)
	return model_result["labels"][:2] # Top 2 frame labels
	except Exception as e:
	st.error(f"Error extracting frames: {e}")
	return ["Unknown"]

	# Extract hashtags
	def extract_hashtags(text):
	return re.findall(r"#\w+", text)

	# Extract captions from DOCX file
	def extract_captions_from_docx(docx_file):
	doc = Document(docx_file)
	captions = {}
	current_post = None
	for para in doc.paragraphs:
	text = para.text.strip()
	if re.match(r"Post \d+", text, re.IGNORECASE):
	current_post = text
	captions[current_post] = []
	elif current_post:
	captions[current_post].append(text)

	return {post: " ".join(lines) for post, lines in captions.items() if lines}

	# Generate a DOCX file in-memory
	def generate_docx(output_data):
	doc = Document()
	doc.add_heading('Activism Message Analysis', 0)

	for index, (caption, result) in enumerate(output_data.items(), start=1):
	doc.add_heading(f"{index}. {caption}", level=1)
	doc.add_paragraph("Full Caption:")
	doc.add_paragraph(result['Full Caption'], style="Quote")

	doc.add_paragraph(f"Language: {result['Language']}")
	doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
	doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
	doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")

	doc.add_heading('Frames:', level=2)
	for frame in result['Frames']:
	doc.add_paragraph(frame)

	doc_io = io.BytesIO()
	doc.save(doc_io)
	doc_io.seek(0)

	return doc_io

	# Streamlit app UI
	st.title('AI-Powered Activism Message Analyzer')

	st.write("Enter the text to analyze or upload a DOCX file containing captions:")

	# Text Input
	input_text = st.text_area("Input Text", height=200)

	# File Upload
	uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])

	# Initialize output dictionary
	output_data = {}

	if input_text:
	language = detect_language(input_text)
	tone = analyze_tone(input_text)
	hashtags = extract_hashtags(input_text)
	frames = extract_frames(input_text)

	output_data["Manual Input"] = {
	'Full Caption': input_text,
	'Language': language,
	'Tone of Caption': tone,
	'Hashtags': hashtags,
	'Hashtag Count': len(hashtags),
	'Frames': frames
	}

	st.success("Analysis completed for text input.")

	if uploaded_file:
	captions = extract_captions_from_docx(uploaded_file)
	for caption, text in captions.items():
	language = detect_language(text)
	tone = analyze_tone(text)
	hashtags = extract_hashtags(text)
	frames = extract_frames(text)

	output_data[caption] = {
	'Full Caption': text,
	'Language': language,
	'Tone of Caption': tone,
	'Hashtags': hashtags,
	'Hashtag Count': len(hashtags),
	'Frames': frames
	}

	st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")

	# Display results
	if output_data:
	with st.expander("Generated Output"):
	st.subheader("Analysis Results")
	for index, (caption, result) in enumerate(output_data.items(), start=1):
	st.write(f"### {index}. {caption}")
	st.write("Full Caption:")
	st.write(f"> {result['Full Caption']}")
	st.write(f"Language: {result['Language']}")
	st.write(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
	st.write(f"Number of Hashtags: {result['Hashtag Count']}")
	st.write(f"Hashtags Found: {', '.join(result['Hashtags'])}")
	st.write("Frames:")
	for frame in result['Frames']:
	st.write(f"- {frame}")

	docx_file = generate_docx(output_data)

	if docx_file:
	st.download_button(
	label="Download Analysis as DOCX",
	data=docx_file,
	file_name="activism_message_analysis.docx",
	mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
	)