Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -43,7 +43,7 @@ class DataAnalyzer:
|
|
43 |
|
44 |
os.remove(report_path)
|
45 |
return html_with_table
|
46 |
-
|
47 |
def generate_autoviz_report(self, df):
|
48 |
if df is None:
|
49 |
return "Please upload a dataset first"
|
@@ -52,23 +52,43 @@ class DataAnalyzer:
|
|
52 |
if os.path.exists(viz_temp_dir):
|
53 |
shutil.rmtree(viz_temp_dir)
|
54 |
os.makedirs(viz_temp_dir)
|
55 |
-
|
56 |
try:
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
dfte = self.AV.AutoViz(
|
59 |
filename='',
|
60 |
sep=',',
|
61 |
depVar='',
|
62 |
dfte=df,
|
63 |
header=0,
|
64 |
-
verbose=
|
65 |
lowess=False,
|
66 |
chart_format='html',
|
67 |
max_rows_analyzed=5000,
|
68 |
max_cols_analyzed=30,
|
69 |
-
save_plot_dir=viz_temp_dir
|
|
|
|
|
|
|
70 |
)
|
71 |
-
|
|
|
72 |
html_parts = []
|
73 |
if os.path.exists(viz_temp_dir):
|
74 |
for file in sorted(os.listdir(viz_temp_dir)):
|
@@ -81,15 +101,48 @@ class DataAnalyzer:
|
|
81 |
html_parts.append(content)
|
82 |
except Exception as e:
|
83 |
print(f"Error reading file {file}: {str(e)}")
|
84 |
-
|
85 |
if not html_parts:
|
86 |
-
return "
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
return combined_html
|
90 |
-
|
91 |
except Exception as e:
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
finally:
|
94 |
if os.path.exists(viz_temp_dir):
|
95 |
shutil.rmtree(viz_temp_dir)
|
|
|
43 |
|
44 |
os.remove(report_path)
|
45 |
return html_with_table
|
46 |
+
|
47 |
def generate_autoviz_report(self, df):
|
48 |
if df is None:
|
49 |
return "Please upload a dataset first"
|
|
|
52 |
if os.path.exists(viz_temp_dir):
|
53 |
shutil.rmtree(viz_temp_dir)
|
54 |
os.makedirs(viz_temp_dir)
|
55 |
+
|
56 |
try:
|
57 |
+
# Sample data if it's too large
|
58 |
+
if len(df) > 5000:
|
59 |
+
df = df.sample(n=5000, random_state=42)
|
60 |
+
|
61 |
+
# Ensure all columns are properly formatted
|
62 |
+
df = df.copy()
|
63 |
+
# Convert numeric columns that might be stored as strings
|
64 |
+
for col in df.columns:
|
65 |
+
if df[col].dtype == 'object':
|
66 |
+
try:
|
67 |
+
df[col] = pd.to_numeric(df[col], errors='ignore')
|
68 |
+
except:
|
69 |
+
pass
|
70 |
+
|
71 |
+
plt.close('all') # Close any existing plots
|
72 |
+
|
73 |
+
# Configure AutoViz with more specific parameters
|
74 |
dfte = self.AV.AutoViz(
|
75 |
filename='',
|
76 |
sep=',',
|
77 |
depVar='',
|
78 |
dfte=df,
|
79 |
header=0,
|
80 |
+
verbose=1, # Set to 1 to see progress
|
81 |
lowess=False,
|
82 |
chart_format='html',
|
83 |
max_rows_analyzed=5000,
|
84 |
max_cols_analyzed=30,
|
85 |
+
save_plot_dir=viz_temp_dir,
|
86 |
+
ignore_warnings=True,
|
87 |
+
sampling=True, # Enable sampling
|
88 |
+
sample_size=5000
|
89 |
)
|
90 |
+
|
91 |
+
# Collect and combine HTML files
|
92 |
html_parts = []
|
93 |
if os.path.exists(viz_temp_dir):
|
94 |
for file in sorted(os.listdir(viz_temp_dir)):
|
|
|
101 |
html_parts.append(content)
|
102 |
except Exception as e:
|
103 |
print(f"Error reading file {file}: {str(e)}")
|
104 |
+
|
105 |
if not html_parts:
|
106 |
+
return """
|
107 |
+
<div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
|
108 |
+
<h3>No visualizations were generated</h3>
|
109 |
+
<p>This might be due to:</p>
|
110 |
+
<ul>
|
111 |
+
<li>Data format issues</li>
|
112 |
+
<li>Too few unique values in columns</li>
|
113 |
+
<li>All categorical data with high cardinality</li>
|
114 |
+
</ul>
|
115 |
+
<p>Try with a different dataset or check your data formatting.</p>
|
116 |
+
</div>
|
117 |
+
"""
|
118 |
+
|
119 |
+
# Combine all HTML content with proper styling
|
120 |
+
combined_html = f"""
|
121 |
+
<div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
|
122 |
+
<h2 style="text-align: center;">AutoViz Analysis Report</h2>
|
123 |
+
<p style="text-align: center;">Analysis of {len(df)} rows and {len(df.columns)} columns</p>
|
124 |
+
<hr>
|
125 |
+
{'<hr>'.join(html_parts)}
|
126 |
+
</div>
|
127 |
+
"""
|
128 |
+
|
129 |
return combined_html
|
130 |
+
|
131 |
except Exception as e:
|
132 |
+
error_message = f"""
|
133 |
+
<div style="padding: 20px; border: 1px solid red; border-radius: 5px;">
|
134 |
+
<h3>Error in AutoViz Analysis</h3>
|
135 |
+
<p>Error details: {str(e)}</p>
|
136 |
+
<p>Troubleshooting steps:</p>
|
137 |
+
<ul>
|
138 |
+
<li>Check if your data contains valid numerical or categorical values</li>
|
139 |
+
<li>Ensure there are no completely empty columns</li>
|
140 |
+
<li>Try with a smaller dataset</li>
|
141 |
+
<li>Check for any special characters in column names</li>
|
142 |
+
</ul>
|
143 |
+
</div>
|
144 |
+
"""
|
145 |
+
return error_message
|
146 |
finally:
|
147 |
if os.path.exists(viz_temp_dir):
|
148 |
shutil.rmtree(viz_temp_dir)
|