csv-plus-plus / app.py
baconnier's picture
Update app.py
830b865 verified
raw
history blame
2.54 kB
import gradio as gr
import pandas as pd
import sweetviz as sv
import tempfile
import os
class DataAnalyzer:
def __init__(self):
self.temp_dir = tempfile.mkdtemp()
def generate_sweetviz_report(self, df):
# Create Sweetviz report
report = sv.analyze(df)
# Save to temporary file with specific name
report_path = os.path.join(self.temp_dir, "sweetviz_report.html")
report.show_html(report_path, open_browser=False)
# Read the generated HTML
with open(report_path, 'r', encoding='utf-8') as f:
html_content = f.read()
# Clean up the temporary file
os.remove(report_path)
return html_content
def create_interface():
analyzer = DataAnalyzer()
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Data Analysis Dashboard")
with gr.Row():
file_input = gr.File(label="Upload CSV")
dataset_info = gr.JSON(label="Dataset Information")
# Create a larger HTML viewer for the report
report_html = gr.HTML(label="Analysis Report", height=800)
def process_file(file):
if file is None:
return None, None
try:
df = pd.read_csv(file.name)
# Convert 'value' column to numeric if possible
df['value'] = pd.to_numeric(df['value'], errors='coerce')
info = {
"Rows": len(df),
"Columns": len(df.columns),
"Memory Usage (MB)": round(df.memory_usage(deep=True).sum() / 1024**2, 2),
"Missing Values": df.isnull().sum().sum(),
"Column Types": df.dtypes.astype(str).to_dict()
}
# Generate Sweetviz report
report = analyzer.generate_sweetviz_report(df)
return info, report
except Exception as e:
return {"error": str(e)}, f"Error generating report: {str(e)}"
file_input.change(
fn=process_file,
inputs=[file_input],
outputs=[dataset_info, report_html]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(
height=1000, # Increased height for better report visibility
show_error=True
)