import gradio as gr import pandas as pd import sweetviz as sv import tempfile import os import category_encoders as ce import umap import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from autoviz.AutoViz_Class import AutoViz_Class import shutil class DataAnalyzer: def __init__(self): self.temp_dir = tempfile.mkdtemp() self.df = None self.AV = AutoViz_Class() def generate_sweetviz_report(self, df): self.df = df report = sv.analyze(df) report_path = os.path.join(self.temp_dir, "report.html") report.show_html(report_path, open_browser=False) with open(report_path, 'r', encoding='utf-8') as f: html_content = f.read() html_with_table = f"""
{html_content}
""" os.remove(report_path) return html_with_table def generate_autoviz_report(self, df): """Generate AutoViz report and return the HTML content""" # Create a temporary directory for AutoViz output viz_temp_dir = os.path.join(self.temp_dir, "autoviz") if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) os.makedirs(viz_temp_dir) try: # Generate AutoViz report dft = self.AV.AutoViz( filename='', sep=',', depVar='', dfte=df, header=0, verbose=0, lowess=False, chart_format='html', max_rows_analyzed=150000, save_plot_dir=viz_temp_dir ) # Combine all HTML files into one html_content = "" for file in sorted(os.listdir(viz_temp_dir)): if file.endswith('.html'): with open(os.path.join(viz_temp_dir, file), 'r', encoding='utf-8') as f: html_content += f.read() + "


" # Wrap the content in a scrollable div html_with_table = f"""
{html_content}
""" return html_with_table except Exception as e: return f"Error generating AutoViz report: {str(e)}" finally: # Clean up if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) def encode_and_visualize(self, column_name, encoder_type='binary'): if self.df is None or column_name not in self.df.columns: return None df_subset = self.df[[column_name]].copy() encoders = { 'binary': ce.BinaryEncoder(), 'onehot': ce.OneHotEncoder(), 'catboost': ce.CatBoostEncoder(), 'count': ce.CountEncoder() } encoder = encoders.get(encoder_type) encoded_df = encoder.fit_transform(df_subset) scaler = StandardScaler() scaled_data = scaler.fit_transform(encoded_df) reducer = umap.UMAP( n_neighbors=15, min_dist=0.1, n_components=2, random_state=42 ) embedding = reducer.fit_transform(scaled_data) plt.figure(figsize=(10, 6)) scatter = plt.scatter( embedding[:, 0], embedding[:, 1], c=pd.factorize(df_subset[column_name])[0], cmap='viridis', alpha=0.6 ) plt.colorbar(scatter) plt.title(f'UMAP visualization of {column_name}\nusing {encoder_type} encoding') plt.xlabel('UMAP1') plt.ylabel('UMAP2') buf = io.BytesIO() plt.savefig(buf, format='png', bbox_inches='tight') plt.close() buf.seek(0) return buf def create_interface(): analyzer = DataAnalyzer() with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Data Analysis Dashboard") with gr.Tabs(): with gr.TabItem("Sweetviz Analysis"): file_input = gr.File(label="Upload CSV") report_html = gr.HTML() with gr.TabItem("AutoViz Analysis"): autoviz_html = gr.HTML() with gr.TabItem("Categorical Analysis"): with gr.Row(): column_dropdown = gr.Dropdown( label="Select Categorical Column", choices=[], interactive=True ) encoder_dropdown = gr.Dropdown( label="Select Encoder", choices=['binary', 'onehot', 'catboost', 'count'], value='binary', interactive=True ) plot_output = gr.Image(label="UMAP Visualization") def process_file(file): if file is None: return None, None, gr.Dropdown(choices=[]) try: df = pd.read_csv(file.name) cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist() # Generate both reports sweetviz_report = analyzer.generate_sweetviz_report(df) autoviz_report = analyzer.generate_autoviz_report(df) return ( sweetviz_report, autoviz_report, gr.Dropdown(choices=cat_columns) ) except Exception as e: return f"Error: {str(e)}", None, gr.Dropdown(choices=[]) def update_plot(column, encoder_type): if column is None: return None try: return analyzer.encode_and_visualize(column, encoder_type) except Exception as e: return None file_input.change( fn=process_file, inputs=[file_input], outputs=[report_html, autoviz_html, column_dropdown] ) column_dropdown.change( fn=update_plot, inputs=[column_dropdown, encoder_dropdown], outputs=[plot_output] ) encoder_dropdown.change( fn=update_plot, inputs=[column_dropdown, encoder_dropdown], outputs=[plot_output] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(show_error=True)