Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from wordcloud import WordCloud, STOPWORDS | |
from nltk.corpus import opinion_lexicon | |
from nltk.tokenize import word_tokenize | |
import nltk | |
import tempfile | |
import os | |
# Ensure NLTK data is downloaded | |
nltk.download('opinion_lexicon') | |
nltk.download('punkt') | |
# Load positive and negative words | |
positive_words = set(opinion_lexicon.positive()) | |
negative_words = set(opinion_lexicon.negative()) | |
class SimpleGroupedColorFunc(object): | |
"""Create a color function object which assigns specified colors to words based on the membership in predefined lists.""" | |
def __init__(self, color_to_words, default_color): | |
self.color_to_words = color_to_words | |
self.default_color = default_color | |
self.words_to_color = {word: color for (color, words) in color_to_words.items() for word in words} | |
def __call__(self, word, **kwargs): | |
return self.words_to_color.get(word, self.default_color) | |
def generate_wordcloud(excel_file, column_name): | |
# Read the Excel file from the uploaded file | |
df = pd.read_excel(excel_file) | |
# Use the user-provided column name | |
if column_name not in df.columns: | |
return "The specified column name does not exist in the Excel file." | |
text_data = ' '.join(df[column_name].dropna().astype(str)) | |
# Tokenize the text | |
tokens = word_tokenize(text_data.lower()) | |
# Combine positive and negative tokens | |
combined_tokens = [word for word in tokens if word in positive_words or word in negative_words] | |
# Generate word cloud text | |
combined_text = ' '.join(combined_tokens) | |
# Generate a word cloud | |
wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=400).generate(combined_text) | |
# Map of words to colors | |
color_to_words = { | |
'green': positive_words, | |
'red': negative_words | |
} | |
# Default color for words not in any predefined list | |
default_color = 'gray' | |
# Create the custom color function | |
grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color) | |
# Apply our color function | |
wordcloud.recolor(color_func=grouped_color_func) | |
# Save the word cloud to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp: | |
wordcloud.to_image().save(tmp.name) | |
# Return the path to the temporary file | |
return tmp.name | |
# Define Gradio interface with an additional input for the column name | |
iface = gr.Interface(fn=generate_wordcloud, | |
inputs=[gr.File(label="Upload Excel File"), gr.Text(label="Column Name")], | |
outputs=gr.Image(label="Word Cloud"), | |
title="Sentiment Analysis Word Cloud Generator", | |
description="Upload an Excel file and specify a column name to generate a sentiment-based word cloud.") | |
if __name__ == "__main__": | |
iface.launch() | |