trttung1610 commited on
Commit
6be9d7e
·
1 Parent(s): 88f122c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -213
app.py DELETED
@@ -1,213 +0,0 @@
1
- from transformers import RobertaForSequenceClassification, AutoTokenizer, pipeline
2
- import torch
3
- import nltk
4
- import docx2txt
5
- import pandas as pd
6
- import os
7
- import matplotlib.pyplot as plt
8
- import openpyxl
9
- from openpyxl.styles import Font, Color, PatternFill
10
- from openpyxl.styles.colors import WHITE
11
- import gradio as gr
12
-
13
- nltk.download('punkt')
14
-
15
- # Load the model and tokenizer
16
- senti_model = RobertaForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
17
- senti_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest", use_fast=False)
18
-
19
- # File read
20
- def read_file(docx):
21
- try:
22
- text = docx2txt.process(docx)
23
- lines = text.split('\n')
24
- lines = [line.strip() for line in lines]
25
- lines = [line for line in lines if line]
26
- return lines # add this line
27
- except Exception as e:
28
- print(f"Error reading file: {e}")
29
-
30
-
31
- # Define a function to analyze the sentiment of a text
32
- def analyze(sentence):
33
- input_ids = torch.tensor([senti_tokenizer.encode(sentence)])
34
- with torch.no_grad():
35
- out = senti_model(input_ids)
36
- results = out.logits.softmax(dim=-1).tolist()
37
- return results[0]
38
-
39
-
40
- def file_analysis(docx):
41
- # Read the file and segment the sentences
42
- sentences = read_file(docx)
43
-
44
- # Analyze the sentiment of each sentence
45
- results = []
46
- for sentence in sentences:
47
- results.append(analyze(sentence))
48
-
49
- return results
50
-
51
-
52
- def generate_pie_chart(df):
53
- # Calculate the average scores
54
- neg_avg = df['Negative'].mean()
55
- pos_avg = df['Positive'].mean()
56
- neu_avg = df['Neutral'].mean()
57
-
58
- # Create a new DataFrame with the average scores
59
- avg_df = pd.DataFrame({'Sentiment': ['Negative', 'Neutral', 'Positive'],
60
- 'Score': [neg_avg, neu_avg, pos_avg]})
61
-
62
- # Set custom colors for the pie chart
63
- colors = ['#BDBDBD', '#87CEFA', '#9ACD32']
64
-
65
- # Create a pie chart showing the average scores
66
- plt.pie(avg_df['Score'], labels=avg_df['Sentiment'], colors=colors, autopct='%1.1f%%')
67
- plt.title('Average Scores by Sentiment')
68
-
69
- # Save the pie chart as an image file in the static folder
70
- pie_chart_name = 'pie_chart.png'
71
- plt.savefig(pie_chart_name)
72
- plt.close()
73
-
74
- return pie_chart_name
75
-
76
-
77
- def generate_excel_file(df):
78
- # Create a new workbook and worksheet
79
- wb = openpyxl.Workbook()
80
- ws = wb.active
81
-
82
- # Add column headers to the worksheet
83
- headers = ['Negative', 'Neutral', 'Positive', 'Text']
84
- for col_num, header in enumerate(headers, 1):
85
- cell = ws.cell(row=1, column=col_num)
86
- cell.value = header
87
- cell.font = Font(bold=True)
88
-
89
- # Set up cell formatting for each sentiment
90
- fill_dict = {
91
- 'Negative': PatternFill(start_color='BDBDBD', end_color='BDBDBD', fill_type='solid'),
92
- 'Positive': PatternFill(start_color='9ACD32', end_color='9ACD32', fill_type='solid'),
93
- 'Neutral': PatternFill(start_color='87CEFA', end_color='87CEFA', fill_type='solid')
94
- }
95
-
96
- # Loop through each row of the input DataFrame and write data to the worksheet
97
- for row_num, row_data in df.iterrows():
98
- # Calculate the highest score and corresponding sentiment for this row
99
- sentiment_cols = ['Negative', 'Neutral', 'Positive']
100
- scores = [row_data[col] for col in sentiment_cols]
101
- max_score = max(scores)
102
- max_index = scores.index(max_score)
103
- sentiment = sentiment_cols[max_index]
104
-
105
- # Write the data to the worksheet
106
- for col_num, col_data in enumerate(row_data, 1):
107
- cell = ws.cell(row=row_num + 2, column=col_num)
108
- cell.value = col_data
109
- if col_num in [1, 2, 3]:
110
- if col_data == max_score:
111
- cell.fill = fill_dict[sentiment]
112
- if col_num == 4:
113
- fill = fill_dict[sentiment]
114
- font_color = WHITE if fill.start_color.rgb == 'BDBDBD' else Color('000000')
115
- cell.fill = fill
116
- cell.font = Font(color=font_color)
117
- if col_data == max_score:
118
- cell.fill = fill_dict[sentiment]
119
-
120
- # Save the workbook
121
- excel_file_path = 'result.xlsx'
122
- wb.save(excel_file_path)
123
-
124
- return excel_file_path
125
-
126
-
127
- def process_file(docx):
128
- # Perform analysis on the file
129
- results = file_analysis(docx)
130
-
131
- # Create a DataFrame from the results
132
- df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive'])
133
- df['Text'] = read_file(docx)
134
-
135
- # Generate the pie chart and excel file
136
- pie_chart_name = generate_pie_chart(df)
137
- excel_file_path = generate_excel_file(df)
138
-
139
- return pie_chart_name, excel_file_path
140
-
141
- def analyze_file(file, sentence):
142
- excel_file_path = None
143
- pie_chart_name = None
144
-
145
- if file and sentence:
146
- # Both file and sentence inputs are provided
147
- # Process the uploaded file and generate the output files
148
- pie_chart_name, excel_file_path = process_file(file.name)
149
-
150
- # Analyze the sentiment of the input sentence
151
- results = analyze(sentence)
152
-
153
- # Get the label names
154
- label_names = ['Negative', 'Neutral', 'Positive']
155
-
156
- # Create the output text with labels and scores
157
- output_text = ""
158
- for label, score in zip(label_names, results):
159
- score_formatted = "{:.2f}".format(score)
160
- output_text += f"{label}: {score_formatted}\n"
161
-
162
- return excel_file_path, pie_chart_name
163
-
164
- elif sentence:
165
- # Only sentence input is provided
166
- # Analyze the sentiment of the input sentence
167
- results = analyze(sentence)
168
-
169
- # Get the label names
170
- label_names = ['Negative', 'Neutral', 'Positive']
171
-
172
- # Create the output text with labels and scores
173
- output_text = ""
174
- for label, score in zip(label_names, results):
175
- score_formatted = "{:.2f}".format(score)
176
- output_text += f"{label}: {score_formatted}\n"
177
-
178
- # Generate the pie chart and excel file
179
- pie_chart_name = generate_pie_chart(pd.DataFrame([results], columns=['Negative', 'Neutral', 'Positive']))
180
- excel_file_path = generate_excel_file(pd.DataFrame([results], columns=['Negative', 'Neutral', 'Positive']))
181
-
182
- return excel_file_path, pie_chart_name
183
-
184
- elif file:
185
- # Only file input is provided
186
- # Process the uploaded file and generate the output files
187
- pie_chart_name, excel_file_path = process_file(file.name)
188
-
189
- # Return the file paths for the pie chart and excel file
190
- return excel_file_path, pie_chart_name
191
-
192
- inputs = [
193
- gr.inputs.File(label="Select File for Analysis"),
194
- gr.inputs.Textbox(label="Enter Text")
195
- ]
196
- outputs = [
197
- gr.outputs.File(label="Analysis Result Excel"),
198
- gr.outputs.Image(type="filepath", label="Analysis Metrics"),
199
- ]
200
-
201
-
202
-
203
- interface = gr.Interface(
204
- fn=analyze_file,
205
- inputs=inputs,
206
- outputs=outputs,
207
- title="Sentiment Analysis",
208
- allow_flagging="never" # Disable flag button
209
- )
210
-
211
-
212
- if __name__ == "__main__":
213
- interface.launch(share=True)