ExcelWordCloud / app.py
sumit-ai-ml's picture
Update app.py
1f041f3 verified
raw
history blame
2.91 kB
import gradio as gr
import pandas as pd
from wordcloud import WordCloud, STOPWORDS
from nltk.corpus import opinion_lexicon
from nltk.tokenize import word_tokenize
import nltk
import tempfile
import os
# Ensure NLTK data is downloaded
nltk.download('opinion_lexicon')
nltk.download('punkt')
# Load positive and negative words
positive_words = set(opinion_lexicon.positive())
negative_words = set(opinion_lexicon.negative())
class SimpleGroupedColorFunc(object):
"""Create a color function object which assigns specified colors to words based on the membership in predefined lists."""
def __init__(self, color_to_words, default_color):
self.color_to_words = color_to_words
self.default_color = default_color
self.words_to_color = {word: color for (color, words) in color_to_words.items() for word in words}
def __call__(self, word, **kwargs):
return self.words_to_color.get(word, self.default_color)
def generate_wordcloud(excel_file, column_name):
# Read the Excel file from the uploaded file
df = pd.read_excel(excel_file)
# Use the user-provided column name
if column_name not in df.columns:
return "The specified column name does not exist in the Excel file."
text_data = ' '.join(df[column_name].dropna().astype(str))
# Tokenize the text
tokens = word_tokenize(text_data.lower())
# Combine positive and negative tokens
combined_tokens = [word for word in tokens if word in positive_words or word in negative_words]
# Generate word cloud text
combined_text = ' '.join(combined_tokens)
# Generate a word cloud
wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=400).generate(combined_text)
# Map of words to colors
color_to_words = {
'green': positive_words,
'red': negative_words
}
# Default color for words not in any predefined list
default_color = 'gray'
# Create the custom color function
grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color)
# Apply our color function
wordcloud.recolor(color_func=grouped_color_func)
# Save the word cloud to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp:
wordcloud.to_image().save(tmp.name)
# Return the path to the temporary file
return tmp.name
# Define Gradio interface with an additional input for the column name
iface = gr.Interface(fn=generate_wordcloud,
inputs=[gr.File(label="Upload Excel File"), gr.Text(label="Column Name")],
outputs=gr.Image(label="Word Cloud"),
title="Sentiment Analysis Word Cloud Generator",
description="Upload an Excel file and specify a column name to generate a sentiment-based word cloud.")
if __name__ == "__main__":
iface.launch()