import gradio as gr
import pandas as pd
import sweetviz as sv
import tempfile
import os
import category_encoders as ce
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from autoviz.AutoViz_Class import AutoViz_Class
import shutil
import warnings
warnings.filterwarnings('ignore')
class DataAnalyzer:
def __init__(self):
self.temp_dir = tempfile.mkdtemp()
self.df = None
self.AV = AutoViz_Class()
def generate_autoviz_report(self, df):
"""Generate AutoViz report with proper error handling"""
viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
os.makedirs(viz_temp_dir)
try:
# Configure AutoViz with safe defaults
dft = self.AV.AutoViz(
filename='',
sep=',',
depVar='',
dfte=df,
header=0,
verbose=0,
lowess=False,
chart_format='html',
max_rows_analyzed=5000, # Limit rows for better performance
max_cols_analyzed=30, # Limit columns
save_plot_dir=viz_temp_dir,
ignore_warnings=True
)
# Collect all generated HTML files
html_parts = []
if os.path.exists(viz_temp_dir):
for file in sorted(os.listdir(viz_temp_dir)):
if file.endswith('.html'):
file_path = os.path.join(viz_temp_dir, file)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if content.strip(): # Only add non-empty content
html_parts.append(content)
except Exception as e:
print(f"Error reading file {file}: {str(e)}")
if not html_parts:
return "No visualizations were generated. The dataset might be too small or contain invalid data."
# Combine all HTML content
combined_html = "
Error details: {str(e)}
Suggestions: