import gradio as gr import pandas as pd import sweetviz as sv import tempfile import os import category_encoders as ce import umap import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from autoviz.AutoViz_Class import AutoViz_Class import shutil import warnings warnings.filterwarnings('ignore') class DataAnalyzer: def __init__(self): self.temp_dir = tempfile.mkdtemp() self.df = None self.AV = AutoViz_Class() def generate_sweetviz_report(self, df): self.df = df report = sv.analyze(df) report_path = os.path.join(self.temp_dir, "report.html") report.show_html(report_path, open_browser=False) with open(report_path, 'r', encoding='utf-8') as f: html_content = f.read() html_with_table = f"""
{html_content}
""" os.remove(report_path) return html_with_table def generate_autoviz_report(self, df): """Generate AutoViz report with proper sequence""" viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output") if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) os.makedirs(viz_temp_dir) try: # Proper AutoViz sequence plt.close('all') # Close any existing plots dfte = self.AV.AutoViz( filename='', sep=',', depVar='', dfte=df, # Pass DataFrame directly header=0, verbose=0, lowess=False, chart_format='html', max_rows_analyzed=5000, max_cols_analyzed=30, save_plot_dir=viz_temp_dir ) # Collect generated HTML files html_parts = [] if os.path.exists(viz_temp_dir): for file in sorted(os.listdir(viz_temp_dir)): if file.endswith('.html'): file_path = os.path.join(viz_temp_dir, file) try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() if content.strip(): html_parts.append(content) except Exception as e: print(f"Error reading file {file}: {str(e)}") if not html_parts: return "No visualizations were generated. Please check your data." combined_html = "


".join(html_parts) return combined_html except Exception as e: return f"Error in AutoViz: {str(e)}" finally: if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) def create_interface(): analyzer = DataAnalyzer() with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Data Analysis Dashboard") # Define all outputs first data_preview = gr.Dataframe(label="Data Preview") report_html = gr.HTML(label="Sweetviz Report") autoviz_html = gr.HTML(label="AutoViz Report") column_dropdown = gr.Dropdown( label="Select Categorical Column", choices=[], interactive=True ) with gr.Tabs(): with gr.TabItem("Data Upload & Preview"): file_input = gr.File(label="Upload CSV") with gr.TabItem("AutoViz Analysis"): gr.Markdown(""" ### AutoViz Analysis Automatic visualization of your dataset """) with gr.TabItem("Categorical Analysis"): encoder_dropdown = gr.Dropdown( label="Select Encoder", choices=['binary', 'onehot', 'catboost', 'count'], value='binary', interactive=True ) plot_output = gr.Image(label="UMAP Visualization") def process_file(file): if file is None: return None, None, None, gr.Dropdown(choices=[]) try: df = pd.read_csv(file.name) preview = df.head() sweetviz_report = analyzer.generate_sweetviz_report(df) autoviz_report = analyzer.generate_autoviz_report(df) cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist() return ( preview, sweetviz_report, autoviz_report, gr.Dropdown(choices=cat_columns) ) except Exception as e: return None, f"Error: {str(e)}", f"Error: {str(e)}", gr.Dropdown(choices=[]) file_input.change( fn=process_file, inputs=[file_input], outputs=[data_preview, report_html, autoviz_html, column_dropdown] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(show_error=True)