Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import sweetviz as sv | |
import tempfile | |
import os | |
import category_encoders as ce | |
import umap | |
import matplotlib.pyplot as plt | |
from sklearn.preprocessing import StandardScaler | |
from autoviz.AutoViz_Class import AutoViz_Class | |
import shutil | |
import warnings | |
warnings.filterwarnings('ignore') | |
class DataAnalyzer: | |
def __init__(self): | |
self.temp_dir = tempfile.mkdtemp() | |
self.df = None | |
self.AV = AutoViz_Class() | |
def generate_sweetviz_report(self, df): | |
if df is None: | |
return "Please upload a dataset first" | |
report = sv.analyze(df) | |
report_path = os.path.join(self.temp_dir, "report.html") | |
report.show_html(report_path, open_browser=False) | |
with open(report_path, 'r', encoding='utf-8') as f: | |
html_content = f.read() | |
html_with_table = f""" | |
<table width="100%" style="border-collapse: collapse;"> | |
<tr> | |
<td style="padding: 20px; border: 1px solid #ddd;"> | |
<div style="height: 800px; overflow: auto;"> | |
{html_content} | |
</div> | |
</td> | |
</tr> | |
</table> | |
""" | |
os.remove(report_path) | |
return html_with_table | |
def generate_autoviz_report(self, df): | |
if df is None: | |
return "Please upload a dataset first" | |
viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output") | |
if os.path.exists(viz_temp_dir): | |
shutil.rmtree(viz_temp_dir) | |
os.makedirs(viz_temp_dir) | |
try: | |
# Sample data if it's too large | |
if len(df) > 5000: | |
df = df.sample(n=5000, random_state=42) | |
# Ensure all columns are properly formatted | |
df = df.copy() | |
# Convert numeric columns that might be stored as strings | |
for col in df.columns: | |
if df[col].dtype == 'object': | |
try: | |
df[col] = pd.to_numeric(df[col], errors='ignore') | |
except: | |
pass | |
plt.close('all') # Close any existing plots | |
# Configure AutoViz with more specific parameters | |
dfte = self.AV.AutoViz( | |
filename='', | |
sep=',', | |
depVar='', | |
dfte=df, | |
header=0, | |
verbose=1, # Set to 1 to see progress | |
lowess=False, | |
chart_format='html', | |
max_rows_analyzed=5000, | |
max_cols_analyzed=30, | |
save_plot_dir=viz_temp_dir, | |
ignore_warnings=True, | |
sampling=True, # Enable sampling | |
sample_size=5000 | |
) | |
# Collect and combine HTML files | |
html_parts = [] | |
if os.path.exists(viz_temp_dir): | |
for file in sorted(os.listdir(viz_temp_dir)): | |
if file.endswith('.html'): | |
file_path = os.path.join(viz_temp_dir, file) | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
if content.strip(): | |
html_parts.append(content) | |
except Exception as e: | |
print(f"Error reading file {file}: {str(e)}") | |
if not html_parts: | |
return """ | |
<div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;"> | |
<h3>No visualizations were generated</h3> | |
<p>This might be due to:</p> | |
<ul> | |
<li>Data format issues</li> | |
<li>Too few unique values in columns</li> | |
<li>All categorical data with high cardinality</li> | |
</ul> | |
<p>Try with a different dataset or check your data formatting.</p> | |
</div> | |
""" | |
# Combine all HTML content with proper styling | |
combined_html = f""" | |
<div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;"> | |
<h2 style="text-align: center;">AutoViz Analysis Report</h2> | |
<p style="text-align: center;">Analysis of {len(df)} rows and {len(df.columns)} columns</p> | |
<hr> | |
{'<hr>'.join(html_parts)} | |
</div> | |
""" | |
return combined_html | |
except Exception as e: | |
error_message = f""" | |
<div style="padding: 20px; border: 1px solid red; border-radius: 5px;"> | |
<h3>Error in AutoViz Analysis</h3> | |
<p>Error details: {str(e)}</p> | |
<p>Troubleshooting steps:</p> | |
<ul> | |
<li>Check if your data contains valid numerical or categorical values</li> | |
<li>Ensure there are no completely empty columns</li> | |
<li>Try with a smaller dataset</li> | |
<li>Check for any special characters in column names</li> | |
</ul> | |
</div> | |
""" | |
return error_message | |
finally: | |
if os.path.exists(viz_temp_dir): | |
shutil.rmtree(viz_temp_dir) | |
def create_interface(): | |
analyzer = DataAnalyzer() | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# Data Analysis Dashboard") | |
# Store the dataframe in a state variable | |
current_df = gr.State(None) | |
with gr.Tabs(): | |
# First Tab: Data Upload & Preview | |
with gr.TabItem("Data Upload & Preview"): | |
with gr.Row(): | |
file_input = gr.File(label="Upload CSV") | |
data_preview = gr.Dataframe(label="Data Preview", interactive=False) | |
def load_data(file): | |
if file is None: | |
return None, None | |
try: | |
df = pd.read_csv(file.name) | |
return df.head(), df | |
except Exception as e: | |
return None, None | |
file_input.change( | |
fn=load_data, | |
inputs=[file_input], | |
outputs=[data_preview, current_df] | |
) | |
# Second Tab: Sweetviz Analysis | |
with gr.TabItem("Sweetviz Analysis"): | |
with gr.Row(): | |
sweetviz_button = gr.Button("Generate Sweetviz Report") | |
sweetviz_output = gr.HTML(label="Sweetviz Report") | |
def generate_sweetviz(df): | |
if df is None: | |
return "Please upload a dataset first" | |
return analyzer.generate_sweetviz_report(df) | |
sweetviz_button.click( | |
fn=generate_sweetviz, | |
inputs=[current_df], | |
outputs=[sweetviz_output] | |
) | |
# Third Tab: AutoViz Analysis | |
with gr.TabItem("AutoViz Analysis"): | |
with gr.Row(): | |
autoviz_button = gr.Button("Generate AutoViz Report") | |
autoviz_output = gr.HTML(label="AutoViz Report") | |
def generate_autoviz(df): | |
if df is None: | |
return "Please upload a dataset first" | |
return analyzer.generate_autoviz_report(df) | |
autoviz_button.click( | |
fn=generate_autoviz, | |
inputs=[current_df], | |
outputs=[autoviz_output] | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch(show_error=True) |