baconnier commited on
Commit
179691f
·
verified ·
1 Parent(s): cb5dc7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -12
app.py CHANGED
@@ -43,7 +43,7 @@ class DataAnalyzer:
43
 
44
  os.remove(report_path)
45
  return html_with_table
46
-
47
  def generate_autoviz_report(self, df):
48
  if df is None:
49
  return "Please upload a dataset first"
@@ -52,23 +52,43 @@ class DataAnalyzer:
52
  if os.path.exists(viz_temp_dir):
53
  shutil.rmtree(viz_temp_dir)
54
  os.makedirs(viz_temp_dir)
55
-
56
  try:
57
- plt.close('all')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  dfte = self.AV.AutoViz(
59
  filename='',
60
  sep=',',
61
  depVar='',
62
  dfte=df,
63
  header=0,
64
- verbose=0,
65
  lowess=False,
66
  chart_format='html',
67
  max_rows_analyzed=5000,
68
  max_cols_analyzed=30,
69
- save_plot_dir=viz_temp_dir
 
 
 
70
  )
71
-
 
72
  html_parts = []
73
  if os.path.exists(viz_temp_dir):
74
  for file in sorted(os.listdir(viz_temp_dir)):
@@ -81,15 +101,48 @@ class DataAnalyzer:
81
  html_parts.append(content)
82
  except Exception as e:
83
  print(f"Error reading file {file}: {str(e)}")
84
-
85
  if not html_parts:
86
- return "No visualizations were generated. Please check your data."
87
-
88
- combined_html = "<br><hr><br>".join(html_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return combined_html
90
-
91
  except Exception as e:
92
- return f"Error in AutoViz: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  finally:
94
  if os.path.exists(viz_temp_dir):
95
  shutil.rmtree(viz_temp_dir)
 
43
 
44
  os.remove(report_path)
45
  return html_with_table
46
+
47
  def generate_autoviz_report(self, df):
48
  if df is None:
49
  return "Please upload a dataset first"
 
52
  if os.path.exists(viz_temp_dir):
53
  shutil.rmtree(viz_temp_dir)
54
  os.makedirs(viz_temp_dir)
55
+
56
  try:
57
+ # Sample data if it's too large
58
+ if len(df) > 5000:
59
+ df = df.sample(n=5000, random_state=42)
60
+
61
+ # Ensure all columns are properly formatted
62
+ df = df.copy()
63
+ # Convert numeric columns that might be stored as strings
64
+ for col in df.columns:
65
+ if df[col].dtype == 'object':
66
+ try:
67
+ df[col] = pd.to_numeric(df[col], errors='ignore')
68
+ except:
69
+ pass
70
+
71
+ plt.close('all') # Close any existing plots
72
+
73
+ # Configure AutoViz with more specific parameters
74
  dfte = self.AV.AutoViz(
75
  filename='',
76
  sep=',',
77
  depVar='',
78
  dfte=df,
79
  header=0,
80
+ verbose=1, # Set to 1 to see progress
81
  lowess=False,
82
  chart_format='html',
83
  max_rows_analyzed=5000,
84
  max_cols_analyzed=30,
85
+ save_plot_dir=viz_temp_dir,
86
+ ignore_warnings=True,
87
+ sampling=True, # Enable sampling
88
+ sample_size=5000
89
  )
90
+
91
+ # Collect and combine HTML files
92
  html_parts = []
93
  if os.path.exists(viz_temp_dir):
94
  for file in sorted(os.listdir(viz_temp_dir)):
 
101
  html_parts.append(content)
102
  except Exception as e:
103
  print(f"Error reading file {file}: {str(e)}")
104
+
105
  if not html_parts:
106
+ return """
107
+ <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
108
+ <h3>No visualizations were generated</h3>
109
+ <p>This might be due to:</p>
110
+ <ul>
111
+ <li>Data format issues</li>
112
+ <li>Too few unique values in columns</li>
113
+ <li>All categorical data with high cardinality</li>
114
+ </ul>
115
+ <p>Try with a different dataset or check your data formatting.</p>
116
+ </div>
117
+ """
118
+
119
+ # Combine all HTML content with proper styling
120
+ combined_html = f"""
121
+ <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
122
+ <h2 style="text-align: center;">AutoViz Analysis Report</h2>
123
+ <p style="text-align: center;">Analysis of {len(df)} rows and {len(df.columns)} columns</p>
124
+ <hr>
125
+ {'<hr>'.join(html_parts)}
126
+ </div>
127
+ """
128
+
129
  return combined_html
130
+
131
  except Exception as e:
132
+ error_message = f"""
133
+ <div style="padding: 20px; border: 1px solid red; border-radius: 5px;">
134
+ <h3>Error in AutoViz Analysis</h3>
135
+ <p>Error details: {str(e)}</p>
136
+ <p>Troubleshooting steps:</p>
137
+ <ul>
138
+ <li>Check if your data contains valid numerical or categorical values</li>
139
+ <li>Ensure there are no completely empty columns</li>
140
+ <li>Try with a smaller dataset</li>
141
+ <li>Check for any special characters in column names</li>
142
+ </ul>
143
+ </div>
144
+ """
145
+ return error_message
146
  finally:
147
  if os.path.exists(viz_temp_dir):
148
  shutil.rmtree(viz_temp_dir)