csv-plus-plus / app.py
baconnier's picture
Update app.py
8d8e69e verified
raw
history blame
5.53 kB
import gradio as gr
import pandas as pd
import sweetviz as sv
import tempfile
import os
import category_encoders as ce
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from autoviz.AutoViz_Class import AutoViz_Class
import shutil
import warnings
warnings.filterwarnings('ignore')
class DataAnalyzer:
def __init__(self):
self.temp_dir = tempfile.mkdtemp()
self.df = None
self.AV = AutoViz_Class()
def generate_sweetviz_report(self, df):
self.df = df
report = sv.analyze(df)
report_path = os.path.join(self.temp_dir, "report.html")
report.show_html(report_path, open_browser=False)
with open(report_path, 'r', encoding='utf-8') as f:
html_content = f.read()
html_with_table = f"""
<table width="100%" style="border-collapse: collapse;">
<tr>
<td style="padding: 20px; border: 1px solid #ddd;">
<div style="height: 800px; overflow: auto;">
{html_content}
</div>
</td>
</tr>
</table>
"""
os.remove(report_path)
return html_with_table
def generate_autoviz_report(self, df):
"""Generate AutoViz report with proper sequence"""
viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
os.makedirs(viz_temp_dir)
try:
# Proper AutoViz sequence
plt.close('all') # Close any existing plots
dfte = self.AV.AutoViz(
filename='',
sep=',',
depVar='',
dfte=df, # Pass DataFrame directly
header=0,
verbose=0,
lowess=False,
chart_format='html',
max_rows_analyzed=5000,
max_cols_analyzed=30,
save_plot_dir=viz_temp_dir
)
# Collect generated HTML files
html_parts = []
if os.path.exists(viz_temp_dir):
for file in sorted(os.listdir(viz_temp_dir)):
if file.endswith('.html'):
file_path = os.path.join(viz_temp_dir, file)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if content.strip():
html_parts.append(content)
except Exception as e:
print(f"Error reading file {file}: {str(e)}")
if not html_parts:
return "No visualizations were generated. Please check your data."
combined_html = "<br><hr><br>".join(html_parts)
return combined_html
except Exception as e:
return f"Error in AutoViz: {str(e)}"
finally:
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
def create_interface():
analyzer = DataAnalyzer()
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Data Analysis Dashboard")
# Define all outputs first
data_preview = gr.Dataframe(label="Data Preview")
report_html = gr.HTML(label="Sweetviz Report")
autoviz_html = gr.HTML(label="AutoViz Report")
column_dropdown = gr.Dropdown(
label="Select Categorical Column",
choices=[],
interactive=True
)
with gr.Tabs():
with gr.TabItem("Data Upload & Preview"):
file_input = gr.File(label="Upload CSV")
with gr.TabItem("AutoViz Analysis"):
gr.Markdown("""
### AutoViz Analysis
Automatic visualization of your dataset
""")
with gr.TabItem("Categorical Analysis"):
encoder_dropdown = gr.Dropdown(
label="Select Encoder",
choices=['binary', 'onehot', 'catboost', 'count'],
value='binary',
interactive=True
)
plot_output = gr.Image(label="UMAP Visualization")
def process_file(file):
if file is None:
return None, None, None, gr.Dropdown(choices=[])
try:
df = pd.read_csv(file.name)
preview = df.head()
sweetviz_report = analyzer.generate_sweetviz_report(df)
autoviz_report = analyzer.generate_autoviz_report(df)
cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
return (
preview,
sweetviz_report,
autoviz_report,
gr.Dropdown(choices=cat_columns)
)
except Exception as e:
return None, f"Error: {str(e)}", f"Error: {str(e)}", gr.Dropdown(choices=[])
file_input.change(
fn=process_file,
inputs=[file_input],
outputs=[data_preview, report_html, autoviz_html, column_dropdown]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(show_error=True)