import gradio as gr import pandas as pd import sweetviz as sv import tempfile import os import category_encoders as ce import umap import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from autoviz.AutoViz_Class import AutoViz_Class import shutil import warnings warnings.filterwarnings('ignore') class DataAnalyzer: def __init__(self): self.temp_dir = tempfile.mkdtemp() self.df = None self.AV = AutoViz_Class() def generate_sweetviz_report(self, df): if df is None: return "Please upload a dataset first" self.df = df report = sv.analyze(df) report_path = os.path.join(self.temp_dir, "report.html") report.show_html(report_path, open_browser=False) with open(report_path, 'r', encoding='utf-8') as f: html_content = f.read() html_with_table = f"""
{html_content}
""" os.remove(report_path) return html_with_table def generate_autoviz_report(self, df): if df is None: return "Please upload a dataset first" viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output") if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) os.makedirs(viz_temp_dir) try: # Data preprocessing df = df.copy() # Handle datetime columns for col in df.columns: try: df[col] = pd.to_datetime(df[col], errors='ignore') except: pass datetime_columns = df.select_dtypes(include=['datetime64']).columns for col in datetime_columns: df[f'{col}_year'] = df[col].dt.year df[f'{col}_month'] = df[col].dt.month df = df.drop(columns=[col]) # Try to convert string columns to numeric where possible for col in df.select_dtypes(include=['object']).columns: try: df[col] = pd.to_numeric(df[col], errors='ignore') except: pass # Convert remaining string columns to categorical if cardinality is low object_columns = df.select_dtypes(include=['object']).columns for col in object_columns: if df[col].nunique() < 50: df[col] = df[col].astype('category') # Sample data if needed if len(df) > 5000: df = df.sample(n=5000, random_state=42) # Print data info for debugging print("\nDataset Info:") print(df.info()) print("\nColumn Types:") print(df.dtypes) plt.close('all') # Run AutoViz dfte = self.AV.AutoViz( filename='', sep=',', depVar='', dfte=df, header=0, verbose=1, lowess=False, chart_format='svg', max_rows_analyzed=5000, max_cols_analyzed=30, save_plot_dir=viz_temp_dir ) # Collect visualizations html_parts = [] if os.path.exists(viz_temp_dir): for file in sorted(os.listdir(viz_temp_dir)): if file.endswith('.html') or file.endswith('.svg'): file_path = os.path.join(viz_temp_dir, file) try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() if content.strip(): html_parts.append(content) except Exception as e: print(f"Error reading file {file}: {str(e)}") if not html_parts: return f"""

Data Summary

Total Rows: {len(df)}

Total Columns: {len(df.columns)}

Column Types:

{df.dtypes.to_string()}

No visualizations were generated

This might be due to:

""" combined_html = f"""

AutoViz Analysis Report

Dataset Summary

Rows analyzed: {len(df)}

Columns: {len(df.columns)}

Column Types:

{df.dtypes.to_string()}

{'
'.join(html_parts)}
""" return combined_html except Exception as e: import traceback error_message = f"""

Error in AutoViz Analysis

Error details: {str(e)}

Stack trace:

{traceback.format_exc()}

Dataset Info:

                Rows: {len(df)}
                Columns: {len(df.columns)}
                Types:\n{df.dtypes.to_string()}
                
""" return error_message finally: if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) def create_interface(): analyzer = DataAnalyzer() with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Data Analysis Dashboard") # Store the dataframe in a state variable current_df = gr.State(None) with gr.Tabs(): # First Tab: Data Upload & Preview with gr.TabItem("Data Upload & Preview"): with gr.Row(): file_input = gr.File(label="Upload CSV") data_preview = gr.Dataframe(label="Data Preview", interactive=False) def load_data(file): if file is None: return None, None try: df = pd.read_csv(file.name) return df.head(), df except Exception as e: return None, None file_input.change( fn=load_data, inputs=[file_input], outputs=[data_preview, current_df] ) # Second Tab: Sweetviz Analysis with gr.TabItem("Sweetviz Analysis"): with gr.Row(): sweetviz_button = gr.Button("Generate Sweetviz Report") sweetviz_output = gr.HTML(label="Sweetviz Report") def generate_sweetviz(df): if df is None: return "Please upload a dataset first" return analyzer.generate_sweetviz_report(df) sweetviz_button.click( fn=generate_sweetviz, inputs=[current_df], outputs=[sweetviz_output] ) # Third Tab: AutoViz Analysis with gr.TabItem("AutoViz Analysis"): with gr.Row(): autoviz_button = gr.Button("Generate AutoViz Report") autoviz_output = gr.HTML(label="AutoViz Report") def generate_autoviz(df): if df is None: return "Please upload a dataset first" return analyzer.generate_autoviz_report(df) autoviz_button.click( fn=generate_autoviz, inputs=[current_df], outputs=[autoviz_output] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(show_error=True)