import gradio as gr import pandas as pd import sweetviz as sv import tempfile import os import category_encoders as ce import umap import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from autoviz.AutoViz_Class import AutoViz_Class import shutil import warnings warnings.filterwarnings('ignore') class DataAnalyzer: def __init__(self): self.temp_dir = tempfile.mkdtemp() self.df = None self.AV = AutoViz_Class() def generate_sweetviz_report(self, df): if df is None: return "Please upload a dataset first" report = sv.analyze(df) report_path = os.path.join(self.temp_dir, "report.html") report.show_html(report_path, open_browser=False) with open(report_path, 'r', encoding='utf-8') as f: html_content = f.read() html_with_table = f"""
{html_content}
""" os.remove(report_path) return html_with_table def generate_autoviz_report(self, df): if df is None: return "Please upload a dataset first" viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output") if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) os.makedirs(viz_temp_dir) try: # Sample data if it's too large if len(df) > 5000: df = df.sample(n=5000, random_state=42) # Ensure all columns are properly formatted df = df.copy() # Convert numeric columns that might be stored as strings for col in df.columns: if df[col].dtype == 'object': try: df[col] = pd.to_numeric(df[col], errors='ignore') except: pass plt.close('all') # Close any existing plots # Configure AutoViz with more specific parameters dfte = self.AV.AutoViz( filename='', sep=',', depVar='', dfte=df, header=0, verbose=1, # Set to 1 to see progress lowess=False, chart_format='html', max_rows_analyzed=5000, max_cols_analyzed=30, save_plot_dir=viz_temp_dir, ignore_warnings=True, sampling=True, # Enable sampling sample_size=5000 ) # Collect and combine HTML files html_parts = [] if os.path.exists(viz_temp_dir): for file in sorted(os.listdir(viz_temp_dir)): if file.endswith('.html'): file_path = os.path.join(viz_temp_dir, file) try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() if content.strip(): html_parts.append(content) except Exception as e: print(f"Error reading file {file}: {str(e)}") if not html_parts: return """

No visualizations were generated

This might be due to:

Try with a different dataset or check your data formatting.

""" # Combine all HTML content with proper styling combined_html = f"""

AutoViz Analysis Report

Analysis of {len(df)} rows and {len(df.columns)} columns


{'
'.join(html_parts)}
""" return combined_html except Exception as e: error_message = f"""

Error in AutoViz Analysis

Error details: {str(e)}

Troubleshooting steps:

""" return error_message finally: if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) def create_interface(): analyzer = DataAnalyzer() with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Data Analysis Dashboard") # Store the dataframe in a state variable current_df = gr.State(None) with gr.Tabs(): # First Tab: Data Upload & Preview with gr.TabItem("Data Upload & Preview"): with gr.Row(): file_input = gr.File(label="Upload CSV") data_preview = gr.Dataframe(label="Data Preview", interactive=False) def load_data(file): if file is None: return None, None try: df = pd.read_csv(file.name) return df.head(), df except Exception as e: return None, None file_input.change( fn=load_data, inputs=[file_input], outputs=[data_preview, current_df] ) # Second Tab: Sweetviz Analysis with gr.TabItem("Sweetviz Analysis"): with gr.Row(): sweetviz_button = gr.Button("Generate Sweetviz Report") sweetviz_output = gr.HTML(label="Sweetviz Report") def generate_sweetviz(df): if df is None: return "Please upload a dataset first" return analyzer.generate_sweetviz_report(df) sweetviz_button.click( fn=generate_sweetviz, inputs=[current_df], outputs=[sweetviz_output] ) # Third Tab: AutoViz Analysis with gr.TabItem("AutoViz Analysis"): with gr.Row(): autoviz_button = gr.Button("Generate AutoViz Report") autoviz_output = gr.HTML(label="AutoViz Report") def generate_autoviz(df): if df is None: return "Please upload a dataset first" return analyzer.generate_autoviz_report(df) autoviz_button.click( fn=generate_autoviz, inputs=[current_df], outputs=[autoviz_output] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(show_error=True)