csv-plus-plus / app.py
baconnier's picture
Update app.py
7617875 verified
raw
history blame
6.06 kB
import gradio as gr
import pandas as pd
import sweetviz as sv
import tempfile
import os
import category_encoders as ce
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from autoviz.AutoViz_Class import AutoViz_Class
import shutil
import warnings
warnings.filterwarnings('ignore')
class DataAnalyzer:
def __init__(self):
self.temp_dir = tempfile.mkdtemp()
self.df = None
self.AV = AutoViz_Class()
def generate_autoviz_report(self, df):
"""Generate AutoViz report with proper error handling"""
viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
os.makedirs(viz_temp_dir)
try:
# Configure AutoViz with safe defaults
dft = self.AV.AutoViz(
filename='',
sep=',',
depVar='',
dfte=df,
header=0,
verbose=0,
lowess=False,
chart_format='html',
max_rows_analyzed=5000, # Limit rows for better performance
max_cols_analyzed=30, # Limit columns
save_plot_dir=viz_temp_dir,
ignore_warnings=True
)
# Collect all generated HTML files
html_parts = []
if os.path.exists(viz_temp_dir):
for file in sorted(os.listdir(viz_temp_dir)):
if file.endswith('.html'):
file_path = os.path.join(viz_temp_dir, file)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if content.strip(): # Only add non-empty content
html_parts.append(content)
except Exception as e:
print(f"Error reading file {file}: {str(e)}")
if not html_parts:
return "No visualizations were generated. The dataset might be too small or contain invalid data."
# Combine all HTML content
combined_html = "<br><hr><br>".join(html_parts)
# Create a container with proper styling
html_with_container = f"""
<div style="width: 100%; max-width: 1200px; margin: 0 auto;">
<div style="height: 800px; overflow-y: auto; padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
<h2 style="text-align: center; margin-bottom: 20px;">AutoViz Analysis Report</h2>
{combined_html}
</div>
</div>
"""
return html_with_container
except Exception as e:
error_message = f"""
<div style="color: red; padding: 20px; border: 1px solid red; border-radius: 5px; margin: 20px;">
<h3>Error Generating AutoViz Report</h3>
<p>Error details: {str(e)}</p>
<p>Suggestions:</p>
<ul>
<li>Check if your dataset has valid numerical or categorical columns</li>
<li>Ensure your dataset has at least 2 columns and 10 rows</li>
<li>Remove any corrupted or invalid data</li>
</ul>
</div>
"""
return error_message
finally:
# Cleanup
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
# ... (rest of the DataAnalyzer class remains the same)
def create_interface():
analyzer = DataAnalyzer()
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Data Analysis Dashboard")
with gr.Tabs():
with gr.TabItem("Data Upload & Preview"):
file_input = gr.File(label="Upload CSV")
data_preview = gr.Dataframe(label="Data Preview")
with gr.TabItem("AutoViz Analysis"):
with gr.Row():
autoviz_html = gr.HTML()
gr.Markdown("""
### AutoViz Analysis Info
- Generates automatic visualizations
- Analyzes relationships between variables
- Creates distribution plots
- Shows correlation matrices
- Identifies patterns and outliers
""")
# ... (other tabs remain the same)
def process_file(file):
if file is None:
return None, None, None, gr.Dropdown(choices=[])
try:
df = pd.read_csv(file.name)
# Preview first few rows
preview = df.head()
# Generate reports
sweetviz_report = analyzer.generate_sweetviz_report(df)
autoviz_report = analyzer.generate_autoviz_report(df)
# Get categorical columns
cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
return (
preview,
sweetviz_report,
autoviz_report,
gr.Dropdown(choices=cat_columns)
)
except Exception as e:
error_message = f"Error processing file: {str(e)}"
return None, error_message, error_message, gr.Dropdown(choices=[])
# Update file input handler
file_input.change(
fn=process_file,
inputs=[file_input],
outputs=[data_preview, report_html, autoviz_html, column_dropdown]
)
# ... (rest of the interface remains the same)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(show_error=True)