Spaces:

baconnier
/

csv-plus-plus

Running

App Files Files Community

csv-plus-plus / app.py

baconnier

Update app.py

7617875 verified 5 months ago

raw

history blame

6.06 kB

	import gradio as gr
	import pandas as pd
	import sweetviz as sv
	import tempfile
	import os
	import category_encoders as ce
	import umap
	import matplotlib.pyplot as plt
	from sklearn.preprocessing import StandardScaler
	from autoviz.AutoViz_Class import AutoViz_Class
	import shutil
	import warnings
	warnings.filterwarnings('ignore')

	class DataAnalyzer:
	def __init__(self):
	self.temp_dir = tempfile.mkdtemp()
	self.df = None
	self.AV = AutoViz_Class()

	def generate_autoviz_report(self, df):
	"""Generate AutoViz report with proper error handling"""
	viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
	if os.path.exists(viz_temp_dir):
	shutil.rmtree(viz_temp_dir)
	os.makedirs(viz_temp_dir)

	try:
	# Configure AutoViz with safe defaults
	dft = self.AV.AutoViz(
	filename='',
	sep=',',
	depVar='',
	dfte=df,
	header=0,
	verbose=0,
	lowess=False,
	chart_format='html',
	max_rows_analyzed=5000, # Limit rows for better performance
	max_cols_analyzed=30, # Limit columns
	save_plot_dir=viz_temp_dir,
	ignore_warnings=True
	)

	# Collect all generated HTML files
	html_parts = []
	if os.path.exists(viz_temp_dir):
	for file in sorted(os.listdir(viz_temp_dir)):
	if file.endswith('.html'):
	file_path = os.path.join(viz_temp_dir, file)
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()
	if content.strip(): # Only add non-empty content
	html_parts.append(content)
	except Exception as e:
	print(f"Error reading file {file}: {str(e)}")

	if not html_parts:
	return "No visualizations were generated. The dataset might be too small or contain invalid data."

	# Combine all HTML content
	combined_html = "<br><hr><br>".join(html_parts)

	# Create a container with proper styling
	html_with_container = f"""
	<div style="width: 100%; max-width: 1200px; margin: 0 auto;">
	<div style="height: 800px; overflow-y: auto; padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
	<h2 style="text-align: center; margin-bottom: 20px;">AutoViz Analysis Report</h2>
	{combined_html}
	</div>
	</div>
	"""

	return html_with_container

	except Exception as e:
	error_message = f"""
	<div style="color: red; padding: 20px; border: 1px solid red; border-radius: 5px; margin: 20px;">
	<h3>Error Generating AutoViz Report</h3>
	<p>Error details: {str(e)}</p>
	<p>Suggestions:</p>
	<ul>
	<li>Check if your dataset has valid numerical or categorical columns</li>
	<li>Ensure your dataset has at least 2 columns and 10 rows</li>
	<li>Remove any corrupted or invalid data</li>
	</ul>
	</div>
	"""
	return error_message
	finally:
	# Cleanup
	if os.path.exists(viz_temp_dir):
	shutil.rmtree(viz_temp_dir)

	# ... (rest of the DataAnalyzer class remains the same)

	def create_interface():
	analyzer = DataAnalyzer()

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Data Analysis Dashboard")

	with gr.Tabs():
	with gr.TabItem("Data Upload & Preview"):
	file_input = gr.File(label="Upload CSV")
	data_preview = gr.Dataframe(label="Data Preview")

	with gr.TabItem("AutoViz Analysis"):
	with gr.Row():
	autoviz_html = gr.HTML()
	gr.Markdown("""
	### AutoViz Analysis Info
	- Generates automatic visualizations
	- Analyzes relationships between variables
	- Creates distribution plots
	- Shows correlation matrices
	- Identifies patterns and outliers
	""")

	# ... (other tabs remain the same)

	def process_file(file):
	if file is None:
	return None, None, None, gr.Dropdown(choices=[])

	try:
	df = pd.read_csv(file.name)

	# Preview first few rows
	preview = df.head()

	# Generate reports
	sweetviz_report = analyzer.generate_sweetviz_report(df)
	autoviz_report = analyzer.generate_autoviz_report(df)

	# Get categorical columns
	cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()

	return (
	preview,
	sweetviz_report,
	autoviz_report,
	gr.Dropdown(choices=cat_columns)
	)
	except Exception as e:
	error_message = f"Error processing file: {str(e)}"
	return None, error_message, error_message, gr.Dropdown(choices=[])

	# Update file input handler
	file_input.change(
	fn=process_file,
	inputs=[file_input],
	outputs=[data_preview, report_html, autoviz_html, column_dropdown]
	)

	# ... (rest of the interface remains the same)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(show_error=True)