baconnier commited on
Commit
7617875
·
verified ·
1 Parent(s): b280299

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -142
app.py CHANGED
@@ -9,6 +9,8 @@ import matplotlib.pyplot as plt
9
  from sklearn.preprocessing import StandardScaler
10
  from autoviz.AutoViz_Class import AutoViz_Class
11
  import shutil
 
 
12
 
13
  class DataAnalyzer:
14
  def __init__(self):
@@ -16,40 +18,15 @@ class DataAnalyzer:
16
  self.df = None
17
  self.AV = AutoViz_Class()
18
 
19
- def generate_sweetviz_report(self, df):
20
- self.df = df
21
- report = sv.analyze(df)
22
- report_path = os.path.join(self.temp_dir, "report.html")
23
- report.show_html(report_path, open_browser=False)
24
-
25
- with open(report_path, 'r', encoding='utf-8') as f:
26
- html_content = f.read()
27
-
28
- html_with_table = f"""
29
- <table width="100%" style="border-collapse: collapse;">
30
- <tr>
31
- <td style="padding: 20px; border: 1px solid #ddd;">
32
- <div style="height: 800px; overflow: auto;">
33
- {html_content}
34
- </div>
35
- </td>
36
- </tr>
37
- </table>
38
- """
39
-
40
- os.remove(report_path)
41
- return html_with_table
42
-
43
  def generate_autoviz_report(self, df):
44
- """Generate AutoViz report and return the HTML content"""
45
- # Create a temporary directory for AutoViz output
46
- viz_temp_dir = os.path.join(self.temp_dir, "autoviz")
47
  if os.path.exists(viz_temp_dir):
48
  shutil.rmtree(viz_temp_dir)
49
  os.makedirs(viz_temp_dir)
50
 
51
  try:
52
- # Generate AutoViz report
53
  dft = self.AV.AutoViz(
54
  filename='',
55
  sep=',',
@@ -59,87 +36,64 @@ class DataAnalyzer:
59
  verbose=0,
60
  lowess=False,
61
  chart_format='html',
62
- max_rows_analyzed=150000,
63
- save_plot_dir=viz_temp_dir
 
 
64
  )
65
 
66
- # Combine all HTML files into one
67
- html_content = ""
68
- for file in sorted(os.listdir(viz_temp_dir)):
69
- if file.endswith('.html'):
70
- with open(os.path.join(viz_temp_dir, file), 'r', encoding='utf-8') as f:
71
- html_content += f.read() + "<br><hr><br>"
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- # Wrap the content in a scrollable div
74
- html_with_table = f"""
75
- <table width="100%" style="border-collapse: collapse;">
76
- <tr>
77
- <td style="padding: 20px; border: 1px solid #ddd;">
78
- <div style="height: 800px; overflow: auto;">
79
- {html_content}
80
- </div>
81
- </td>
82
- </tr>
83
- </table>
84
  """
85
 
86
- return html_with_table
87
 
88
  except Exception as e:
89
- return f"Error generating AutoViz report: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
90
  finally:
91
- # Clean up
92
  if os.path.exists(viz_temp_dir):
93
  shutil.rmtree(viz_temp_dir)
94
 
95
- def encode_and_visualize(self, column_name, encoder_type='binary'):
96
- if self.df is None or column_name not in self.df.columns:
97
- return None
98
-
99
- df_subset = self.df[[column_name]].copy()
100
-
101
- encoders = {
102
- 'binary': ce.BinaryEncoder(),
103
- 'onehot': ce.OneHotEncoder(),
104
- 'catboost': ce.CatBoostEncoder(),
105
- 'count': ce.CountEncoder()
106
- }
107
-
108
- encoder = encoders.get(encoder_type)
109
- encoded_df = encoder.fit_transform(df_subset)
110
-
111
- scaler = StandardScaler()
112
- scaled_data = scaler.fit_transform(encoded_df)
113
-
114
- reducer = umap.UMAP(
115
- n_neighbors=15,
116
- min_dist=0.1,
117
- n_components=2,
118
- random_state=42
119
- )
120
-
121
- embedding = reducer.fit_transform(scaled_data)
122
-
123
- plt.figure(figsize=(10, 6))
124
- scatter = plt.scatter(
125
- embedding[:, 0],
126
- embedding[:, 1],
127
- c=pd.factorize(df_subset[column_name])[0],
128
- cmap='viridis',
129
- alpha=0.6
130
- )
131
-
132
- plt.colorbar(scatter)
133
- plt.title(f'UMAP visualization of {column_name}\nusing {encoder_type} encoding')
134
- plt.xlabel('UMAP1')
135
- plt.ylabel('UMAP2')
136
-
137
- buf = io.BytesIO()
138
- plt.savefig(buf, format='png', bbox_inches='tight')
139
- plt.close()
140
- buf.seek(0)
141
-
142
- return buf
143
 
144
  def create_interface():
145
  analyzer = DataAnalyzer()
@@ -148,74 +102,60 @@ def create_interface():
148
  gr.Markdown("# Data Analysis Dashboard")
149
 
150
  with gr.Tabs():
151
- with gr.TabItem("Sweetviz Analysis"):
152
  file_input = gr.File(label="Upload CSV")
153
- report_html = gr.HTML()
154
 
155
  with gr.TabItem("AutoViz Analysis"):
156
- autoviz_html = gr.HTML()
157
-
158
- with gr.TabItem("Categorical Analysis"):
159
  with gr.Row():
160
- column_dropdown = gr.Dropdown(
161
- label="Select Categorical Column",
162
- choices=[],
163
- interactive=True
164
- )
165
- encoder_dropdown = gr.Dropdown(
166
- label="Select Encoder",
167
- choices=['binary', 'onehot', 'catboost', 'count'],
168
- value='binary',
169
- interactive=True
170
- )
171
- plot_output = gr.Image(label="UMAP Visualization")
172
-
173
  def process_file(file):
174
  if file is None:
175
- return None, None, gr.Dropdown(choices=[])
176
 
177
  try:
178
  df = pd.read_csv(file.name)
179
- cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
180
 
181
- # Generate both reports
 
 
 
182
  sweetviz_report = analyzer.generate_sweetviz_report(df)
183
  autoviz_report = analyzer.generate_autoviz_report(df)
184
 
 
 
 
185
  return (
 
186
  sweetviz_report,
187
  autoviz_report,
188
  gr.Dropdown(choices=cat_columns)
189
  )
190
  except Exception as e:
191
- return f"Error: {str(e)}", None, gr.Dropdown(choices=[])
192
-
193
- def update_plot(column, encoder_type):
194
- if column is None:
195
- return None
196
- try:
197
- return analyzer.encode_and_visualize(column, encoder_type)
198
- except Exception as e:
199
- return None
200
-
201
  file_input.change(
202
  fn=process_file,
203
  inputs=[file_input],
204
- outputs=[report_html, autoviz_html, column_dropdown]
205
  )
206
 
207
- column_dropdown.change(
208
- fn=update_plot,
209
- inputs=[column_dropdown, encoder_dropdown],
210
- outputs=[plot_output]
211
- )
212
-
213
- encoder_dropdown.change(
214
- fn=update_plot,
215
- inputs=[column_dropdown, encoder_dropdown],
216
- outputs=[plot_output]
217
- )
218
-
219
  return demo
220
 
221
  if __name__ == "__main__":
 
9
  from sklearn.preprocessing import StandardScaler
10
  from autoviz.AutoViz_Class import AutoViz_Class
11
  import shutil
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
 
15
  class DataAnalyzer:
16
  def __init__(self):
 
18
  self.df = None
19
  self.AV = AutoViz_Class()
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def generate_autoviz_report(self, df):
22
+ """Generate AutoViz report with proper error handling"""
23
+ viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
 
24
  if os.path.exists(viz_temp_dir):
25
  shutil.rmtree(viz_temp_dir)
26
  os.makedirs(viz_temp_dir)
27
 
28
  try:
29
+ # Configure AutoViz with safe defaults
30
  dft = self.AV.AutoViz(
31
  filename='',
32
  sep=',',
 
36
  verbose=0,
37
  lowess=False,
38
  chart_format='html',
39
+ max_rows_analyzed=5000, # Limit rows for better performance
40
+ max_cols_analyzed=30, # Limit columns
41
+ save_plot_dir=viz_temp_dir,
42
+ ignore_warnings=True
43
  )
44
 
45
+ # Collect all generated HTML files
46
+ html_parts = []
47
+ if os.path.exists(viz_temp_dir):
48
+ for file in sorted(os.listdir(viz_temp_dir)):
49
+ if file.endswith('.html'):
50
+ file_path = os.path.join(viz_temp_dir, file)
51
+ try:
52
+ with open(file_path, 'r', encoding='utf-8') as f:
53
+ content = f.read()
54
+ if content.strip(): # Only add non-empty content
55
+ html_parts.append(content)
56
+ except Exception as e:
57
+ print(f"Error reading file {file}: {str(e)}")
58
+
59
+ if not html_parts:
60
+ return "No visualizations were generated. The dataset might be too small or contain invalid data."
61
+
62
+ # Combine all HTML content
63
+ combined_html = "<br><hr><br>".join(html_parts)
64
 
65
+ # Create a container with proper styling
66
+ html_with_container = f"""
67
+ <div style="width: 100%; max-width: 1200px; margin: 0 auto;">
68
+ <div style="height: 800px; overflow-y: auto; padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
69
+ <h2 style="text-align: center; margin-bottom: 20px;">AutoViz Analysis Report</h2>
70
+ {combined_html}
71
+ </div>
72
+ </div>
 
 
 
73
  """
74
 
75
+ return html_with_container
76
 
77
  except Exception as e:
78
+ error_message = f"""
79
+ <div style="color: red; padding: 20px; border: 1px solid red; border-radius: 5px; margin: 20px;">
80
+ <h3>Error Generating AutoViz Report</h3>
81
+ <p>Error details: {str(e)}</p>
82
+ <p>Suggestions:</p>
83
+ <ul>
84
+ <li>Check if your dataset has valid numerical or categorical columns</li>
85
+ <li>Ensure your dataset has at least 2 columns and 10 rows</li>
86
+ <li>Remove any corrupted or invalid data</li>
87
+ </ul>
88
+ </div>
89
+ """
90
+ return error_message
91
  finally:
92
+ # Cleanup
93
  if os.path.exists(viz_temp_dir):
94
  shutil.rmtree(viz_temp_dir)
95
 
96
+ # ... (rest of the DataAnalyzer class remains the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def create_interface():
99
  analyzer = DataAnalyzer()
 
102
  gr.Markdown("# Data Analysis Dashboard")
103
 
104
  with gr.Tabs():
105
+ with gr.TabItem("Data Upload & Preview"):
106
  file_input = gr.File(label="Upload CSV")
107
+ data_preview = gr.Dataframe(label="Data Preview")
108
 
109
  with gr.TabItem("AutoViz Analysis"):
 
 
 
110
  with gr.Row():
111
+ autoviz_html = gr.HTML()
112
+ gr.Markdown("""
113
+ ### AutoViz Analysis Info
114
+ - Generates automatic visualizations
115
+ - Analyzes relationships between variables
116
+ - Creates distribution plots
117
+ - Shows correlation matrices
118
+ - Identifies patterns and outliers
119
+ """)
120
+
121
+ # ... (other tabs remain the same)
122
+
 
123
  def process_file(file):
124
  if file is None:
125
+ return None, None, None, gr.Dropdown(choices=[])
126
 
127
  try:
128
  df = pd.read_csv(file.name)
 
129
 
130
+ # Preview first few rows
131
+ preview = df.head()
132
+
133
+ # Generate reports
134
  sweetviz_report = analyzer.generate_sweetviz_report(df)
135
  autoviz_report = analyzer.generate_autoviz_report(df)
136
 
137
+ # Get categorical columns
138
+ cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
139
+
140
  return (
141
+ preview,
142
  sweetviz_report,
143
  autoviz_report,
144
  gr.Dropdown(choices=cat_columns)
145
  )
146
  except Exception as e:
147
+ error_message = f"Error processing file: {str(e)}"
148
+ return None, error_message, error_message, gr.Dropdown(choices=[])
149
+
150
+ # Update file input handler
 
 
 
 
 
 
151
  file_input.change(
152
  fn=process_file,
153
  inputs=[file_input],
154
+ outputs=[data_preview, report_html, autoviz_html, column_dropdown]
155
  )
156
 
157
+ # ... (rest of the interface remains the same)
158
+
 
 
 
 
 
 
 
 
 
 
159
  return demo
160
 
161
  if __name__ == "__main__":