Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import sweetviz as sv | |
import tempfile | |
import os | |
import category_encoders as ce | |
import umap | |
import matplotlib.pyplot as plt | |
from sklearn.preprocessing import StandardScaler | |
from autoviz.AutoViz_Class import AutoViz_Class | |
import shutil | |
import warnings | |
warnings.filterwarnings('ignore') | |
class DataAnalyzer: | |
def __init__(self): | |
self.temp_dir = tempfile.mkdtemp() | |
self.df = None | |
self.AV = AutoViz_Class() | |
def generate_autoviz_report(self, df): | |
"""Generate AutoViz report with proper error handling""" | |
viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output") | |
if os.path.exists(viz_temp_dir): | |
shutil.rmtree(viz_temp_dir) | |
os.makedirs(viz_temp_dir) | |
try: | |
# Configure AutoViz with safe defaults | |
dft = self.AV.AutoViz( | |
filename='', | |
sep=',', | |
depVar='', | |
dfte=df, | |
header=0, | |
verbose=0, | |
lowess=False, | |
chart_format='html', | |
max_rows_analyzed=5000, # Limit rows for better performance | |
max_cols_analyzed=30, # Limit columns | |
save_plot_dir=viz_temp_dir, | |
ignore_warnings=True | |
) | |
# Collect all generated HTML files | |
html_parts = [] | |
if os.path.exists(viz_temp_dir): | |
for file in sorted(os.listdir(viz_temp_dir)): | |
if file.endswith('.html'): | |
file_path = os.path.join(viz_temp_dir, file) | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
if content.strip(): # Only add non-empty content | |
html_parts.append(content) | |
except Exception as e: | |
print(f"Error reading file {file}: {str(e)}") | |
if not html_parts: | |
return "No visualizations were generated. The dataset might be too small or contain invalid data." | |
# Combine all HTML content | |
combined_html = "<br><hr><br>".join(html_parts) | |
# Create a container with proper styling | |
html_with_container = f""" | |
<div style="width: 100%; max-width: 1200px; margin: 0 auto;"> | |
<div style="height: 800px; overflow-y: auto; padding: 20px; border: 1px solid #ddd; border-radius: 5px;"> | |
<h2 style="text-align: center; margin-bottom: 20px;">AutoViz Analysis Report</h2> | |
{combined_html} | |
</div> | |
</div> | |
""" | |
return html_with_container | |
except Exception as e: | |
error_message = f""" | |
<div style="color: red; padding: 20px; border: 1px solid red; border-radius: 5px; margin: 20px;"> | |
<h3>Error Generating AutoViz Report</h3> | |
<p>Error details: {str(e)}</p> | |
<p>Suggestions:</p> | |
<ul> | |
<li>Check if your dataset has valid numerical or categorical columns</li> | |
<li>Ensure your dataset has at least 2 columns and 10 rows</li> | |
<li>Remove any corrupted or invalid data</li> | |
</ul> | |
</div> | |
""" | |
return error_message | |
finally: | |
# Cleanup | |
if os.path.exists(viz_temp_dir): | |
shutil.rmtree(viz_temp_dir) | |
# ... (rest of the DataAnalyzer class remains the same) | |
def create_interface(): | |
analyzer = DataAnalyzer() | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# Data Analysis Dashboard") | |
with gr.Tabs(): | |
with gr.TabItem("Data Upload & Preview"): | |
file_input = gr.File(label="Upload CSV") | |
data_preview = gr.Dataframe(label="Data Preview") | |
with gr.TabItem("AutoViz Analysis"): | |
with gr.Row(): | |
autoviz_html = gr.HTML() | |
gr.Markdown(""" | |
### AutoViz Analysis Info | |
- Generates automatic visualizations | |
- Analyzes relationships between variables | |
- Creates distribution plots | |
- Shows correlation matrices | |
- Identifies patterns and outliers | |
""") | |
# ... (other tabs remain the same) | |
def process_file(file): | |
if file is None: | |
return None, None, None, gr.Dropdown(choices=[]) | |
try: | |
df = pd.read_csv(file.name) | |
# Preview first few rows | |
preview = df.head() | |
# Generate reports | |
sweetviz_report = analyzer.generate_sweetviz_report(df) | |
autoviz_report = analyzer.generate_autoviz_report(df) | |
# Get categorical columns | |
cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist() | |
return ( | |
preview, | |
sweetviz_report, | |
autoviz_report, | |
gr.Dropdown(choices=cat_columns) | |
) | |
except Exception as e: | |
error_message = f"Error processing file: {str(e)}" | |
return None, error_message, error_message, gr.Dropdown(choices=[]) | |
# Update file input handler | |
file_input.change( | |
fn=process_file, | |
inputs=[file_input], | |
outputs=[data_preview, report_html, autoviz_html, column_dropdown] | |
) | |
# ... (rest of the interface remains the same) | |
return demo | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch(show_error=True) |