import gradio as gr
import pandas as pd
import sweetviz as sv
import tempfile
import os
import category_encoders as ce
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from autoviz.AutoViz_Class import AutoViz_Class
import shutil
class DataAnalyzer:
def __init__(self):
self.temp_dir = tempfile.mkdtemp()
self.df = None
self.AV = AutoViz_Class()
def generate_sweetviz_report(self, df):
self.df = df
report = sv.analyze(df)
report_path = os.path.join(self.temp_dir, "report.html")
report.show_html(report_path, open_browser=False)
with open(report_path, 'r', encoding='utf-8') as f:
html_content = f.read()
html_with_table = f"""
"""
os.remove(report_path)
return html_with_table
def generate_autoviz_report(self, df):
"""Generate AutoViz report and return the HTML content"""
# Create a temporary directory for AutoViz output
viz_temp_dir = os.path.join(self.temp_dir, "autoviz")
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
os.makedirs(viz_temp_dir)
try:
# Generate AutoViz report
dft = self.AV.AutoViz(
filename='',
sep=',',
depVar='',
dfte=df,
header=0,
verbose=0,
lowess=False,
chart_format='html',
max_rows_analyzed=150000,
save_plot_dir=viz_temp_dir
)
# Combine all HTML files into one
html_content = ""
for file in sorted(os.listdir(viz_temp_dir)):
if file.endswith('.html'):
with open(os.path.join(viz_temp_dir, file), 'r', encoding='utf-8') as f:
html_content += f.read() + "
"
# Wrap the content in a scrollable div
html_with_table = f"""
"""
return html_with_table
except Exception as e:
return f"Error generating AutoViz report: {str(e)}"
finally:
# Clean up
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
def encode_and_visualize(self, column_name, encoder_type='binary'):
if self.df is None or column_name not in self.df.columns:
return None
df_subset = self.df[[column_name]].copy()
encoders = {
'binary': ce.BinaryEncoder(),
'onehot': ce.OneHotEncoder(),
'catboost': ce.CatBoostEncoder(),
'count': ce.CountEncoder()
}
encoder = encoders.get(encoder_type)
encoded_df = encoder.fit_transform(df_subset)
scaler = StandardScaler()
scaled_data = scaler.fit_transform(encoded_df)
reducer = umap.UMAP(
n_neighbors=15,
min_dist=0.1,
n_components=2,
random_state=42
)
embedding = reducer.fit_transform(scaled_data)
plt.figure(figsize=(10, 6))
scatter = plt.scatter(
embedding[:, 0],
embedding[:, 1],
c=pd.factorize(df_subset[column_name])[0],
cmap='viridis',
alpha=0.6
)
plt.colorbar(scatter)
plt.title(f'UMAP visualization of {column_name}\nusing {encoder_type} encoding')
plt.xlabel('UMAP1')
plt.ylabel('UMAP2')
buf = io.BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
plt.close()
buf.seek(0)
return buf
def create_interface():
analyzer = DataAnalyzer()
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Data Analysis Dashboard")
with gr.Tabs():
with gr.TabItem("Sweetviz Analysis"):
file_input = gr.File(label="Upload CSV")
report_html = gr.HTML()
with gr.TabItem("AutoViz Analysis"):
autoviz_html = gr.HTML()
with gr.TabItem("Categorical Analysis"):
with gr.Row():
column_dropdown = gr.Dropdown(
label="Select Categorical Column",
choices=[],
interactive=True
)
encoder_dropdown = gr.Dropdown(
label="Select Encoder",
choices=['binary', 'onehot', 'catboost', 'count'],
value='binary',
interactive=True
)
plot_output = gr.Image(label="UMAP Visualization")
def process_file(file):
if file is None:
return None, None, gr.Dropdown(choices=[])
try:
df = pd.read_csv(file.name)
cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
# Generate both reports
sweetviz_report = analyzer.generate_sweetviz_report(df)
autoviz_report = analyzer.generate_autoviz_report(df)
return (
sweetviz_report,
autoviz_report,
gr.Dropdown(choices=cat_columns)
)
except Exception as e:
return f"Error: {str(e)}", None, gr.Dropdown(choices=[])
def update_plot(column, encoder_type):
if column is None:
return None
try:
return analyzer.encode_and_visualize(column, encoder_type)
except Exception as e:
return None
file_input.change(
fn=process_file,
inputs=[file_input],
outputs=[report_html, autoviz_html, column_dropdown]
)
column_dropdown.change(
fn=update_plot,
inputs=[column_dropdown, encoder_dropdown],
outputs=[plot_output]
)
encoder_dropdown.change(
fn=update_plot,
inputs=[column_dropdown, encoder_dropdown],
outputs=[plot_output]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(show_error=True)