Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,52 +1,82 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
from wordcloud import WordCloud, STOPWORDS
|
4 |
-
import matplotlib.pyplot as plt
|
5 |
from nltk.corpus import opinion_lexicon
|
6 |
from nltk.tokenize import word_tokenize
|
7 |
import nltk
|
8 |
-
|
9 |
-
import
|
10 |
-
import io
|
11 |
|
12 |
# Ensure NLTK data is downloaded
|
13 |
nltk.download('opinion_lexicon')
|
14 |
nltk.download('punkt')
|
15 |
|
16 |
-
#
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
buf = io.BytesIO()
|
26 |
-
plt.savefig(buf, format='png')
|
27 |
-
buf.seek(0)
|
28 |
-
image = Image.open(buf)
|
29 |
-
# Convert to numpy array for Gradio output
|
30 |
-
image_array = np.array(image)
|
31 |
-
return image_array
|
32 |
|
33 |
-
#
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
# Generate the word cloud
|
40 |
-
image = generate_word_cloud("temp_excel_file.xlsx", column_name)
|
41 |
|
42 |
-
# Return the image
|
43 |
-
return image
|
44 |
|
45 |
-
iface = gr.Interface(fn=process_excel,
|
46 |
-
inputs=[gr.File(file_count=1, label="Upload Excel File"), gr.Textbox(label="Column Name")],
|
47 |
-
outputs=gr.Image(type="numpy", label="Word Cloud"),
|
48 |
-
title="Word Cloud Generator",
|
49 |
-
description="Upload an Excel file and enter the column name to generate a word cloud of positive and negative words.")
|
50 |
|
51 |
if __name__ == "__main__":
|
52 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
from wordcloud import WordCloud, STOPWORDS
|
|
|
4 |
from nltk.corpus import opinion_lexicon
|
5 |
from nltk.tokenize import word_tokenize
|
6 |
import nltk
|
7 |
+
import tempfile
|
8 |
+
import os
|
|
|
9 |
|
10 |
# Ensure NLTK data is downloaded
|
11 |
nltk.download('opinion_lexicon')
|
12 |
nltk.download('punkt')
|
13 |
|
14 |
+
# Load positive and negative words
|
15 |
+
positive_words = set(opinion_lexicon.positive())
|
16 |
+
negative_words = set(opinion_lexicon.negative())
|
17 |
|
18 |
+
class SimpleGroupedColorFunc(object):
|
19 |
+
"""Create a color function object which assigns specified colors to words based on the membership in predefined lists."""
|
20 |
+
def __init__(self, color_to_words, default_color):
|
21 |
+
self.color_to_words = color_to_words
|
22 |
+
self.default_color = default_color
|
23 |
+
self.words_to_color = {word: color for (color, words) in color_to_words.items() for word in words}
|
24 |
+
|
25 |
+
def __call__(self, word, **kwargs):
|
26 |
+
return self.words_to_color.get(word, self.default_color)
|
27 |
|
28 |
+
def generate_wordcloud(excel_file, column_name):
|
29 |
+
# Read the Excel file from the uploaded file
|
30 |
+
df = pd.read_excel(excel_file)
|
31 |
+
|
32 |
+
# Use the user-provided column name
|
33 |
+
if column_name not in df.columns:
|
34 |
+
return "The specified column name does not exist in the Excel file."
|
35 |
|
36 |
+
text_data = ' '.join(df[column_name].dropna().astype(str))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
# Tokenize the text
|
39 |
+
tokens = word_tokenize(text_data.lower())
|
40 |
+
|
41 |
+
# Combine positive and negative tokens
|
42 |
+
combined_tokens = [word for word in tokens if word in positive_words or word in negative_words]
|
43 |
+
|
44 |
+
# Generate word cloud text
|
45 |
+
combined_text = ' '.join(combined_tokens)
|
46 |
+
|
47 |
+
# Generate a word cloud
|
48 |
+
wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=400).generate(combined_text)
|
49 |
+
|
50 |
+
# Map of words to colors
|
51 |
+
color_to_words = {
|
52 |
+
'green': positive_words,
|
53 |
+
'red': negative_words
|
54 |
+
}
|
55 |
+
|
56 |
+
# Default color for words not in any predefined list
|
57 |
+
default_color = 'gray'
|
58 |
+
|
59 |
+
# Create the custom color function
|
60 |
+
grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color)
|
61 |
+
|
62 |
+
# Apply our color function
|
63 |
+
wordcloud.recolor(color_func=grouped_color_func)
|
64 |
+
|
65 |
+
# Save the word cloud to a temporary file
|
66 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp:
|
67 |
+
wordcloud.to_image().save(tmp.name)
|
68 |
+
# Return the path to the temporary file
|
69 |
+
return tmp.name
|
70 |
+
|
71 |
+
# Define Gradio interface with an additional input for the column name
|
72 |
+
iface = gr.Interface(fn=generate_wordcloud,
|
73 |
+
inputs=[gr.File(label="Upload Excel File"), gr.Text(label="Column Name")],
|
74 |
+
outputs=gr.Image(label="Word Cloud"),
|
75 |
+
title="Sentiment Analysis Word Cloud Generator",
|
76 |
+
description="Upload an Excel file and specify a column name to generate a sentiment-based word cloud.")
|
77 |
|
|
|
|
|
78 |
|
|
|
|
|
79 |
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
if __name__ == "__main__":
|
82 |
iface.launch()
|