import gradio as gr import pandas as pd from wordcloud import WordCloud, STOPWORDS from nltk.corpus import opinion_lexicon from nltk.tokenize import word_tokenize import nltk import tempfile import os # Ensure NLTK data is downloaded nltk.download('opinion_lexicon') nltk.download('punkt') # Load positive and negative words positive_words = set(opinion_lexicon.positive()) negative_words = set(opinion_lexicon.negative()) class SimpleGroupedColorFunc(object): """Create a color function object which assigns specified colors to words based on the membership in predefined lists.""" def __init__(self, color_to_words, default_color): self.color_to_words = color_to_words self.default_color = default_color self.words_to_color = {word: color for (color, words) in color_to_words.items() for word in words} def __call__(self, word, **kwargs): return self.words_to_color.get(word, self.default_color) def generate_wordcloud(excel_file, column_name): # Read the Excel file from the uploaded file df = pd.read_excel(excel_file) # Use the user-provided column name if column_name not in df.columns: return "The specified column name does not exist in the Excel file." text_data = ' '.join(df[column_name].dropna().astype(str)) # Tokenize the text tokens = word_tokenize(text_data.lower()) # Combine positive and negative tokens combined_tokens = [word for word in tokens if word in positive_words or word in negative_words] # Generate word cloud text combined_text = ' '.join(combined_tokens) # Generate a word cloud wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=400).generate(combined_text) # Map of words to colors color_to_words = { 'green': positive_words, 'red': negative_words } # Default color for words not in any predefined list default_color = 'gray' # Create the custom color function grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color) # Apply our color function wordcloud.recolor(color_func=grouped_color_func) # Save the word cloud to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp: wordcloud.to_image().save(tmp.name) # Return the path to the temporary file return tmp.name # Define Gradio interface with an additional input for the column name iface = gr.Interface(fn=generate_wordcloud, inputs=[gr.File(label="Upload Excel File"), gr.Text(label="Column Name")], outputs=gr.Image(label="Word Cloud"), title="Sentiment Analysis Word Cloud Generator", description="Upload an Excel file and specify a column name to generate a sentiment-based word cloud.") if __name__ == "__main__": iface.launch()