baconnier commited on
Commit
8d8e69e
·
verified ·
1 Parent(s): 7617875

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -63
app.py CHANGED
@@ -18,31 +18,55 @@ class DataAnalyzer:
18
  self.df = None
19
  self.AV = AutoViz_Class()
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def generate_autoviz_report(self, df):
22
- """Generate AutoViz report with proper error handling"""
23
  viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
24
  if os.path.exists(viz_temp_dir):
25
  shutil.rmtree(viz_temp_dir)
26
  os.makedirs(viz_temp_dir)
27
 
28
  try:
29
- # Configure AutoViz with safe defaults
30
- dft = self.AV.AutoViz(
 
31
  filename='',
32
  sep=',',
33
  depVar='',
34
- dfte=df,
35
  header=0,
36
  verbose=0,
37
  lowess=False,
38
  chart_format='html',
39
- max_rows_analyzed=5000, # Limit rows for better performance
40
- max_cols_analyzed=30, # Limit columns
41
- save_plot_dir=viz_temp_dir,
42
- ignore_warnings=True
43
  )
44
 
45
- # Collect all generated HTML files
46
  html_parts = []
47
  if os.path.exists(viz_temp_dir):
48
  for file in sorted(os.listdir(viz_temp_dir)):
@@ -51,74 +75,57 @@ class DataAnalyzer:
51
  try:
52
  with open(file_path, 'r', encoding='utf-8') as f:
53
  content = f.read()
54
- if content.strip(): # Only add non-empty content
55
  html_parts.append(content)
56
  except Exception as e:
57
  print(f"Error reading file {file}: {str(e)}")
58
 
59
  if not html_parts:
60
- return "No visualizations were generated. The dataset might be too small or contain invalid data."
61
 
62
- # Combine all HTML content
63
  combined_html = "<br><hr><br>".join(html_parts)
64
-
65
- # Create a container with proper styling
66
- html_with_container = f"""
67
- <div style="width: 100%; max-width: 1200px; margin: 0 auto;">
68
- <div style="height: 800px; overflow-y: auto; padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
69
- <h2 style="text-align: center; margin-bottom: 20px;">AutoViz Analysis Report</h2>
70
- {combined_html}
71
- </div>
72
- </div>
73
- """
74
-
75
- return html_with_container
76
 
77
  except Exception as e:
78
- error_message = f"""
79
- <div style="color: red; padding: 20px; border: 1px solid red; border-radius: 5px; margin: 20px;">
80
- <h3>Error Generating AutoViz Report</h3>
81
- <p>Error details: {str(e)}</p>
82
- <p>Suggestions:</p>
83
- <ul>
84
- <li>Check if your dataset has valid numerical or categorical columns</li>
85
- <li>Ensure your dataset has at least 2 columns and 10 rows</li>
86
- <li>Remove any corrupted or invalid data</li>
87
- </ul>
88
- </div>
89
- """
90
- return error_message
91
  finally:
92
- # Cleanup
93
  if os.path.exists(viz_temp_dir):
94
  shutil.rmtree(viz_temp_dir)
95
 
96
- # ... (rest of the DataAnalyzer class remains the same)
97
-
98
  def create_interface():
99
  analyzer = DataAnalyzer()
100
 
101
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
102
  gr.Markdown("# Data Analysis Dashboard")
103
 
 
 
 
 
 
 
 
 
 
 
104
  with gr.Tabs():
105
  with gr.TabItem("Data Upload & Preview"):
106
  file_input = gr.File(label="Upload CSV")
107
- data_preview = gr.Dataframe(label="Data Preview")
108
 
109
  with gr.TabItem("AutoViz Analysis"):
110
- with gr.Row():
111
- autoviz_html = gr.HTML()
112
- gr.Markdown("""
113
- ### AutoViz Analysis Info
114
- - Generates automatic visualizations
115
- - Analyzes relationships between variables
116
- - Creates distribution plots
117
- - Shows correlation matrices
118
- - Identifies patterns and outliers
119
- """)
120
 
121
- # ... (other tabs remain the same)
 
 
 
 
 
 
 
122
 
123
  def process_file(file):
124
  if file is None:
@@ -126,15 +133,9 @@ def create_interface():
126
 
127
  try:
128
  df = pd.read_csv(file.name)
129
-
130
- # Preview first few rows
131
  preview = df.head()
132
-
133
- # Generate reports
134
  sweetviz_report = analyzer.generate_sweetviz_report(df)
135
  autoviz_report = analyzer.generate_autoviz_report(df)
136
-
137
- # Get categorical columns
138
  cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
139
 
140
  return (
@@ -144,17 +145,13 @@ def create_interface():
144
  gr.Dropdown(choices=cat_columns)
145
  )
146
  except Exception as e:
147
- error_message = f"Error processing file: {str(e)}"
148
- return None, error_message, error_message, gr.Dropdown(choices=[])
149
 
150
- # Update file input handler
151
  file_input.change(
152
  fn=process_file,
153
  inputs=[file_input],
154
  outputs=[data_preview, report_html, autoviz_html, column_dropdown]
155
  )
156
-
157
- # ... (rest of the interface remains the same)
158
 
159
  return demo
160
 
 
18
  self.df = None
19
  self.AV = AutoViz_Class()
20
 
21
+ def generate_sweetviz_report(self, df):
22
+ self.df = df
23
+ report = sv.analyze(df)
24
+ report_path = os.path.join(self.temp_dir, "report.html")
25
+ report.show_html(report_path, open_browser=False)
26
+
27
+ with open(report_path, 'r', encoding='utf-8') as f:
28
+ html_content = f.read()
29
+
30
+ html_with_table = f"""
31
+ <table width="100%" style="border-collapse: collapse;">
32
+ <tr>
33
+ <td style="padding: 20px; border: 1px solid #ddd;">
34
+ <div style="height: 800px; overflow: auto;">
35
+ {html_content}
36
+ </div>
37
+ </td>
38
+ </tr>
39
+ </table>
40
+ """
41
+
42
+ os.remove(report_path)
43
+ return html_with_table
44
+
45
  def generate_autoviz_report(self, df):
46
+ """Generate AutoViz report with proper sequence"""
47
  viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
48
  if os.path.exists(viz_temp_dir):
49
  shutil.rmtree(viz_temp_dir)
50
  os.makedirs(viz_temp_dir)
51
 
52
  try:
53
+ # Proper AutoViz sequence
54
+ plt.close('all') # Close any existing plots
55
+ dfte = self.AV.AutoViz(
56
  filename='',
57
  sep=',',
58
  depVar='',
59
+ dfte=df, # Pass DataFrame directly
60
  header=0,
61
  verbose=0,
62
  lowess=False,
63
  chart_format='html',
64
+ max_rows_analyzed=5000,
65
+ max_cols_analyzed=30,
66
+ save_plot_dir=viz_temp_dir
 
67
  )
68
 
69
+ # Collect generated HTML files
70
  html_parts = []
71
  if os.path.exists(viz_temp_dir):
72
  for file in sorted(os.listdir(viz_temp_dir)):
 
75
  try:
76
  with open(file_path, 'r', encoding='utf-8') as f:
77
  content = f.read()
78
+ if content.strip():
79
  html_parts.append(content)
80
  except Exception as e:
81
  print(f"Error reading file {file}: {str(e)}")
82
 
83
  if not html_parts:
84
+ return "No visualizations were generated. Please check your data."
85
 
 
86
  combined_html = "<br><hr><br>".join(html_parts)
87
+ return combined_html
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  except Exception as e:
90
+ return f"Error in AutoViz: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
91
  finally:
 
92
  if os.path.exists(viz_temp_dir):
93
  shutil.rmtree(viz_temp_dir)
94
 
 
 
95
  def create_interface():
96
  analyzer = DataAnalyzer()
97
 
98
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
99
  gr.Markdown("# Data Analysis Dashboard")
100
 
101
+ # Define all outputs first
102
+ data_preview = gr.Dataframe(label="Data Preview")
103
+ report_html = gr.HTML(label="Sweetviz Report")
104
+ autoviz_html = gr.HTML(label="AutoViz Report")
105
+ column_dropdown = gr.Dropdown(
106
+ label="Select Categorical Column",
107
+ choices=[],
108
+ interactive=True
109
+ )
110
+
111
  with gr.Tabs():
112
  with gr.TabItem("Data Upload & Preview"):
113
  file_input = gr.File(label="Upload CSV")
 
114
 
115
  with gr.TabItem("AutoViz Analysis"):
116
+ gr.Markdown("""
117
+ ### AutoViz Analysis
118
+ Automatic visualization of your dataset
119
+ """)
 
 
 
 
 
 
120
 
121
+ with gr.TabItem("Categorical Analysis"):
122
+ encoder_dropdown = gr.Dropdown(
123
+ label="Select Encoder",
124
+ choices=['binary', 'onehot', 'catboost', 'count'],
125
+ value='binary',
126
+ interactive=True
127
+ )
128
+ plot_output = gr.Image(label="UMAP Visualization")
129
 
130
  def process_file(file):
131
  if file is None:
 
133
 
134
  try:
135
  df = pd.read_csv(file.name)
 
 
136
  preview = df.head()
 
 
137
  sweetviz_report = analyzer.generate_sweetviz_report(df)
138
  autoviz_report = analyzer.generate_autoviz_report(df)
 
 
139
  cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
140
 
141
  return (
 
145
  gr.Dropdown(choices=cat_columns)
146
  )
147
  except Exception as e:
148
+ return None, f"Error: {str(e)}", f"Error: {str(e)}", gr.Dropdown(choices=[])
 
149
 
 
150
  file_input.change(
151
  fn=process_file,
152
  inputs=[file_input],
153
  outputs=[data_preview, report_html, autoviz_html, column_dropdown]
154
  )
 
 
155
 
156
  return demo
157