🚀 AnalytixPro v2.0
Advanced AI-Powered Data Analysis & Business Intelligence Platform
✨ Enhanced with Advanced Statistics • 🎯 Multi-format Support • 📊 Interactive Visualizations • 📱 Mobile Optimized
import gradio as gr # import pandas as pd import aiohttp import asyncio import json import os import numpy as np import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots from typing import Optional, Tuple, Dict, Any, List import logging from datetime import datetime, timedelta import re from jinja2 import Template import markdown import zipfile import io import base64 from scipy import stats import seaborn as sns import warnings warnings.filterwarnings('ignore') # Configure logging with better formatting logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class AdvancedDataAnalyzer: def __init__(self): self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" self.max_file_size = 100 * 1024 * 1024 # Increased to 100MB self.conversation_history = [] self.current_df = None self.current_charts = None self.analysis_cache = {} self.supported_formats = ['.csv', '.xlsx', '.xls', '.json', '.parquet', '.tsv'] def validate_api_key(self, api_key: str) -> Tuple[bool, str]: """Enhanced API key validation""" if not api_key or len(api_key.strip()) < 10: return False, "API key must be at least 10 characters long" # Check for common API key patterns api_key = api_key.strip() if not (api_key.startswith(('sk-', 'pk-', 'Bearer ')) or len(api_key) > 20): return False, "API key format appears invalid" return True, "Valid API key format" def validate_file(self, file) -> Tuple[bool, str]: """Enhanced file validation with better error messages""" if not file: return False, "No file uploaded" try: file_size = os.path.getsize(file.name) if file_size > self.max_file_size: return False, f"File too large. Maximum size: {self.max_file_size // (1024*1024)}MB" if file_size == 0: return False, "File is empty" file_extension = os.path.splitext(file.name)[1].lower() if file_extension not in self.supported_formats: return False, f"Unsupported format. Supported: {', '.join(self.supported_formats)}" return True, "File validation passed" except Exception as e: return False, f"File validation error: {str(e)}" async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None, analysis_type: str = "comprehensive") -> str: """Enhanced API call with better prompts and error handling""" headers = { "Authorization": f"Bearer {api_token.strip()}", "Content-Type": "application/json", "User-Agent": "SmartDataAnalyzer/2.0" } # Create specialized prompts based on analysis type prompts = { "comprehensive": f"""You are a senior data scientist with 10+ years of experience. Analyze this dataset comprehensively: {data_summary} Provide a thorough analysis with: 1. **Executive Summary**: 3-4 key takeaways for stakeholders 2. **Statistical Insights**: Important numbers, distributions, and what they reveal 3. **Pattern Recognition**: Trends, correlations, seasonality, anomalies 4. **Data Quality Assessment**: Completeness, accuracy, consistency issues 5. **Business Intelligence**: Actionable insights and opportunities 6. **Risk Analysis**: Potential data quality issues or business risks 7. **Recommendations**: Specific, prioritized next steps Use bullet points, specific numbers, and clear explanations.""", "quick": f"""Provide a quick but insightful analysis of this dataset: {data_summary} Focus on: - Top 3 most important findings - Any obvious patterns or anomalies - Key business insights - Quick recommendations Keep it concise but valuable.""", "question": f"""Based on this dataset: {data_summary} User's specific question: {user_question} Provide a detailed, data-driven answer with: - Direct answer to the question - Supporting evidence from the data - Additional related insights - Specific recommendations - Follow-up questions to consider""" } prompt = prompts.get(analysis_type, prompts["comprehensive"]) if user_question and analysis_type != "question": prompt += f"\n\nUser's additional question: {user_question}" body = { "model": "openai/gpt-oss-20b", "messages": [ { "role": "system", "content": """You are an expert data scientist and business analyst. Provide clear, actionable insights with specific data points. Use markdown formatting for better readability. Always include: - Specific numbers and percentages - Clear section headers - Bullet points for key insights - Bold text for important findings - Recommendations with priority levels""" }, { "role": "user", "content": prompt } ], "stream": True, "max_tokens": 4000, "temperature": 0.3, "top_p": 0.9 } try: timeout = aiohttp.ClientTimeout(total=45) # Increased timeout async with aiohttp.ClientSession(timeout=timeout) as session: async with session.post(self.api_base_url, headers=headers, json=body) as response: if response.status == 401: return "❌ **Authentication Error**: Invalid API key. Please verify your Chutes API token." elif response.status == 429: return "⏳ **Rate Limit Exceeded**: Too many requests. Please wait 30 seconds and try again." elif response.status == 503: return "🔧 **Service Unavailable**: API temporarily unavailable. Please try again later." elif response.status != 200: error_text = await response.text() return f"❌ **API Error {response.status}**: {error_text[:200]}" full_response = "" async for line in response.content: line = line.decode("utf-8").strip() if line.startswith("data: "): data = line[6:] if data == "[DONE]": break try: chunk_data = json.loads(data) if "choices" in chunk_data and len(chunk_data["choices"]) > 0: delta = chunk_data["choices"][0].get("delta", {}) content = delta.get("content", "") if content: full_response += content except json.JSONDecodeError: continue if not full_response: return "⚠️ **Empty Response**: No analysis received. Please try again." # Store in conversation history self.conversation_history.append({ "timestamp": datetime.now(), "question": user_question or "General Analysis", "response": full_response[:500] + "..." if len(full_response) > 500 else full_response }) return full_response except asyncio.TimeoutError: return "⏰ **Timeout Error**: Analysis took too long. Try with a smaller file or simpler question." except aiohttp.ClientError as e: logger.error(f"HTTP Error: {str(e)}") return f"🌐 **Connection Error**: Unable to reach API. Check your internet connection." except Exception as e: logger.error(f"Unexpected API Error: {str(e)}") return f"❌ **Unexpected Error**: {str(e)}" def process_file(self, file_path: str, sample_size: int = None) -> Tuple[pd.DataFrame, str, str]: """Enhanced file processing with support for multiple formats and sampling""" try: file_extension = os.path.splitext(file_path)[1].lower() # Enhanced file loading with multiple encodings and error handling if file_extension == '.csv': for encoding in ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']: for sep in [',', ';', '\t', '|']: try: df = pd.read_csv(file_path, encoding=encoding, sep=sep, low_memory=False) if df.shape[1] > 1: # Valid separator found break except (UnicodeDecodeError, pd.errors.ParserError): continue else: continue break else: raise ValueError("Could not decode CSV file with any supported encoding/separator") elif file_extension == '.tsv': df = pd.read_csv(file_path, sep='\t', encoding='utf-8') elif file_extension in ['.xlsx', '.xls']: df = pd.read_excel(file_path, engine='openpyxl' if file_extension == '.xlsx' else 'xlrd') elif file_extension == '.json': with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) df = pd.json_normalize(data) if isinstance(data, list) else pd.DataFrame(data) elif file_extension == '.parquet': df = pd.read_parquet(file_path) # Data cleaning and preprocessing df.columns = df.columns.astype(str).str.strip().str.replace(r'\s+', ' ', regex=True) # Remove completely empty rows and columns df = df.dropna(how='all').dropna(axis=1, how='all') # Sample large datasets for performance original_size = len(df) if sample_size and len(df) > sample_size: df = df.sample(n=sample_size, random_state=42) logger.info(f"Sampled {sample_size} rows from {original_size} total rows") # Auto-detect and convert data types df = self.auto_detect_types(df) self.current_df = df data_summary = self.generate_comprehensive_summary(df, original_size) charts_html = self.generate_advanced_visualizations(df) return df, data_summary, charts_html except Exception as e: logger.error(f"File processing error: {str(e)}") raise Exception(f"Error processing file: {str(e)}") def auto_detect_types(self, df: pd.DataFrame) -> pd.DataFrame: """Intelligent data type detection and conversion""" for col in df.columns: if df[col].dtype == 'object': # Try to convert to datetime if any(keyword in col.lower() for keyword in ['date', 'time', 'created', 'updated', 'timestamp']): try: df[col] = pd.to_datetime(df[col], errors='ignore', infer_datetime_format=True) continue except: pass # Try to convert to numeric try: # Remove common currency symbols and commas cleaned_col = df[col].astype(str).str.replace(r'[$,€£¥₹]', '', regex=True) cleaned_col = cleaned_col.str.replace(r'[^\d.-]', '', regex=True) numeric_col = pd.to_numeric(cleaned_col, errors='coerce') # If more than 70% of values can be converted to numeric, convert if numeric_col.notna().sum() / len(df) > 0.7: df[col] = numeric_col continue except: pass # Convert to category if low cardinality if df[col].nunique() / len(df) < 0.1 and df[col].nunique() < 50: df[col] = df[col].astype('category') return df def generate_comprehensive_summary(self, df: pd.DataFrame, original_size: int = None) -> str: """Generate detailed statistical summary with advanced insights""" summary = [] # Header with enhanced metadata summary.append("# 📊 Advanced Dataset Analysis Report") summary.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") summary.append(f"**Dataset Size**: {df.shape[0]:,} rows × {df.shape[1]} columns") if original_size and original_size != len(df): summary.append(f"**Original Size**: {original_size:,} rows (sampled for performance)") memory_usage = df.memory_usage(deep=True).sum() / 1024**2 summary.append(f"**Memory Usage**: {memory_usage:.2f} MB") summary.append(f"**Data Density**: {(1 - df.isnull().sum().sum() / (df.shape[0] * df.shape[1])):.1%} complete\n") # Enhanced column type analysis type_counts = df.dtypes.value_counts() summary.append("## 📋 Column Type Distribution:") for dtype, count in type_counts.items(): percentage = (count / len(df.columns) * 100) summary.append(f"- **{dtype}**: {count} columns ({percentage:.1f}%)") # Advanced missing data analysis missing_data = df.isnull().sum() missing_pct = (missing_data / len(df) * 100).round(2) missing_summary = missing_data[missing_data > 0].sort_values(ascending=False) if len(missing_summary) > 0: summary.append("\n## ⚠️ Data Quality Issues:") total_missing = missing_data.sum() summary.append(f"**Total Missing Values**: {total_missing:,} ({total_missing/(df.shape[0]*df.shape[1])*100:.2f}% of all data)") for col, count in missing_summary.head(10).items(): pct = missing_pct[col] severity = "🔴 Critical" if pct > 50 else "🟡 Moderate" if pct > 20 else "🟢 Minor" summary.append(f"- **{col}**: {count:,} missing ({pct}%) - {severity}") else: summary.append("\n## ✅ Data Quality: Perfect! No missing values detected") # Enhanced numerical analysis with statistical tests numeric_cols = df.select_dtypes(include=[np.number]).columns if len(numeric_cols) > 0: summary.append(f"\n## 📈 Numerical Analysis ({len(numeric_cols)} columns):") for col in numeric_cols[:8]: # Analyze top 8 numeric columns stats_data = df[col].describe() # Advanced statistical measures skewness = stats.skew(df[col].dropna()) kurtosis = stats.kurtosis(df[col].dropna()) # Outlier detection using IQR method Q1 = stats_data['25%'] Q3 = stats_data['75%'] IQR = Q3 - Q1 outliers = len(df[(df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))]) # Distribution shape analysis if abs(skewness) < 0.5: distribution = "Normal" elif skewness > 0.5: distribution = "Right-skewed" else: distribution = "Left-skewed" summary.append(f"- **{col}**:") summary.append(f" - Range: {stats_data['min']:.2f} to {stats_data['max']:.2f}") summary.append(f" - Central: μ={stats_data['mean']:.2f}, median={stats_data['50%']:.2f}") summary.append(f" - Spread: σ={stats_data['std']:.2f}, IQR={IQR:.2f}") summary.append(f" - Shape: {distribution} (skew={skewness:.2f})") summary.append(f" - Outliers: {outliers} ({outliers/len(df)*100:.1f}%)") # Enhanced categorical analysis categorical_cols = df.select_dtypes(include=['object', 'category']).columns if len(categorical_cols) > 0: summary.append(f"\n## 📝 Categorical Analysis ({len(categorical_cols)} columns):") for col in categorical_cols[:8]: unique_count = df[col].nunique() total_count = len(df[col].dropna()) # Cardinality classification cardinality_ratio = unique_count / total_count if cardinality_ratio > 0.9: cardinality = "🔴 Very High (likely ID field)" elif cardinality_ratio > 0.5: cardinality = "🟡 High" elif cardinality_ratio > 0.1: cardinality = "🟢 Medium" else: cardinality = "🔵 Low" # Top values analysis value_counts = df[col].value_counts() most_common = value_counts.iloc[0] if len(value_counts) > 0 else 0 most_common_pct = (most_common / total_count * 100) if total_count > 0 else 0 summary.append(f"- **{col}**:") summary.append(f" - Unique values: {unique_count:,} ({cardinality})") summary.append(f" - Most frequent: '{value_counts.index[0]}' ({most_common:,} times, {most_common_pct:.1f}%)") if len(value_counts) > 1: entropy = stats.entropy(value_counts.values) summary.append(f" - Diversity index: {entropy:.2f}") # Date/Time analysis datetime_cols = df.select_dtypes(include=['datetime64']).columns if len(datetime_cols) > 0: summary.append(f"\n## 📅 Temporal Analysis ({len(datetime_cols)} columns):") for col in datetime_cols[:3]: date_range = df[col].max() - df[col].min() summary.append(f"- **{col}**: {df[col].min()} to {df[col].max()} (span: {date_range.days} days)") # Advanced data profiling summary.append("\n## 🔍 Advanced Data Profiling:") # Duplicate analysis duplicate_rows = df.duplicated().sum() summary.append(f"- **Duplicate rows**: {duplicate_rows:,} ({duplicate_rows/len(df)*100:.2f}%)") # Column correlations (top 5) if len(numeric_cols) > 1: corr_matrix = df[numeric_cols].corr() high_corr_pairs = [] for i in range(len(corr_matrix.columns)): for j in range(i+1, len(corr_matrix.columns)): corr_val = corr_matrix.iloc[i, j] if abs(corr_val) > 0.7: # Strong correlation threshold high_corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val)) if high_corr_pairs: summary.append("- **Strong correlations detected**:") for col1, col2, corr_val in sorted(high_corr_pairs, key=lambda x: abs(x[2]), reverse=True)[:5]: summary.append(f" - {col1} ↔ {col2}: {corr_val:.3f}") # Data sample with enhanced formatting summary.append("\n## 🔍 Enhanced Data Sample (First 3 Rows):") sample_df = df.head(3) for idx, row in sample_df.iterrows(): summary.append(f"\n**Row {idx + 1}:**") for col, val in row.items(): # Format values based on type if pd.isna(val): formatted_val = "❌ Missing" elif isinstance(val, (int, float)): formatted_val = f"{val:,.2f}" if isinstance(val, float) else f"{val:,}" else: formatted_val = str(val)[:50] + ("..." if len(str(val)) > 50 else "") summary.append(f" - **{col}**: {formatted_val}") return "\n".join(summary) def generate_advanced_visualizations(self, df: pd.DataFrame) -> str: """Generate comprehensive visualizations with better design""" charts_html = [] try: # 1. Enhanced Missing Data Visualization missing_data = df.isnull().sum() if missing_data.sum() > 0: missing_pct = (missing_data / len(df) * 100).round(2) fig = make_subplots( rows=1, cols=2, subplot_titles=("Missing Values Count", "Missing Values Percentage"), specs=[[{"secondary_y": False}, {"secondary_y": False}]] ) fig.add_trace( go.Bar(x=missing_data.index, y=missing_data.values, name="Count", marker_color='rgb(255, 99, 132)'), row=1, col=1 ) fig.add_trace( go.Bar(x=missing_pct.index, y=missing_pct.values, name="Percentage", marker_color='rgb(255, 159, 64)'), row=1, col=2 ) fig.update_layout( title_text="🔍 Comprehensive Missing Data Analysis", title_x=0.5, height=500, showlegend=False ) fig.update_xaxes(tickangle=-45) charts_html.append("
No charts could be generated for this dataset.
" except Exception as e: logger.error(f"Chart generation error: {str(e)}") return f"❌ Advanced chart generation failed: {str(e)}
" def generate_insights_summary(self, df: pd.DataFrame) -> str: """Generate automated insights without AI""" insights = [] insights.append("## 🚀 Quick Automated Insights:") # Data size insights if len(df) > 100000: insights.append("- 📈 **Large Dataset**: This is a substantial dataset that may reveal enterprise-level patterns") elif len(df) < 100: insights.append("- 📉 **Small Dataset**: Consider collecting more data for robust statistical analysis") # Missing data insights missing_pct = (df.isnull().sum().sum() / (df.shape[0] * df.shape[1])) * 100 if missing_pct > 20: insights.append("- ⚠️ **Data Quality Concern**: High percentage of missing data may impact analysis reliability") elif missing_pct < 5: insights.append("- ✅ **Excellent Data Quality**: Very low missing data percentage") # Numerical insights numeric_cols = df.select_dtypes(include=[np.number]).columns if len(numeric_cols) > 0: # Check for potential outliers outlier_cols = [] for col in numeric_cols: Q1 = df[col].quantile(0.25) Q3 = df[col].quantile(0.75) IQR = Q3 - Q1 outliers = len(df[(df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))]) if outliers / len(df) > 0.1: # More than 10% outliers outlier_cols.append(col) if outlier_cols: insights.append(f"- 🎯 **Outlier Detection**: {len(outlier_cols)} columns have significant outliers") # Categorical insights categorical_cols = df.select_dtypes(include=['object', 'category']).columns high_cardinality_cols = [col for col in categorical_cols if df[col].nunique() / len(df) > 0.8] if high_cardinality_cols: insights.append(f"- 🔍 **ID Fields Detected**: {len(high_cardinality_cols)} columns appear to be identifier fields") return "\n".join(insights) def export_comprehensive_report(self, analysis_text: str, data_summary: str, file_name: str, format_type: str) -> Tuple[str, str]: """Enhanced report generation with multiple formats""" timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') file_base_name = os.path.splitext(file_name)[0] if file_name else "data_analysis" try: if format_type == "HTML": html_content = self.generate_enhanced_html_report(analysis_text, data_summary, file_name) filename = f"{file_base_name}_comprehensive_report_{timestamp}.html" with open(filename, 'w', encoding='utf-8') as f: f.write(html_content) return filename, f"✅ Comprehensive HTML report generated! File: {filename}" else: # Markdown report_content = self.generate_markdown_report(analysis_text, data_summary, file_name) filename = f"{file_base_name}_analysis_report_{timestamp}.md" with open(filename, 'w', encoding='utf-8') as f: f.write(report_content) return filename, f"✅ Markdown report generated! File: {filename}" except Exception as e: logger.error(f"Report export error: {str(e)}") return None, f"❌ Error generating {format_type} report: {str(e)}" def generate_enhanced_html_report(self, analysis_text: str, data_summary: str, file_name: str = "Unknown") -> str: """Generate premium HTML report with advanced styling""" html_template = """Comprehensive AI-Powered Business Intelligence Dashboard
{{ data_summary }}
No visualizations available
" return template.render( file_name=file_name, timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ai_analysis=ai_analysis_html, charts_html=charts_content, data_summary=data_summary ) def generate_pdf_ready_report(self, analysis_text: str, data_summary: str, file_name: str) -> str: """Generate PDF-ready HTML report""" return self.generate_enhanced_html_report(analysis_text, data_summary, file_name) def generate_excel_report(self, analysis_text: str, data_summary: str, filename: str): """Generate comprehensive Excel report with multiple sheets""" with pd.ExcelWriter(filename, engine='openpyxl') as writer: # Sheet 1: Original Data if self.current_df is not None: self.current_df.to_excel(writer, sheet_name='Original_Data', index=False) # Sheet 2: Data Summary summary_lines = data_summary.split('\n') summary_df = pd.DataFrame({'Analysis_Summary': summary_lines}) summary_df.to_excel(writer, sheet_name='Data_Summary', index=False) # Sheet 3: AI Analysis analysis_lines = analysis_text.split('\n') analysis_df = pd.DataFrame({'AI_Analysis': analysis_lines}) analysis_df.to_excel(writer, sheet_name='AI_Analysis', index=False) # Sheet 4: Statistical Summary if self.current_df is not None: numeric_cols = self.current_df.select_dtypes(include=[np.number]).columns if len(numeric_cols) > 0: stats_df = self.current_df[numeric_cols].describe() stats_df.to_excel(writer, sheet_name='Statistical_Summary') def generate_markdown_report(self, analysis_text: str, data_summary: str, file_name: str) -> str: """Generate enhanced markdown report""" return f"""# 📊 Advanced Data Analysis Report **File:** {file_name} **Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} **Analyzer:** AnalytixPro v2.0 **AI Model:** OpenAI gpt-oss-20b via Chutes API --- ## 🚀 Executive Summary & AI Insights {analysis_text} --- ## 📋 Technical Data Profile {data_summary} text--- ## 📞 Support & Contact - **WhatsApp Support:** +8801719296601 - **Email:** https://tinyurl.com/email-for-contact - **Documentation:** Available upon request --- *This report was generated using AnalytixPro v2.0 - Professional data analysis powered by advanced AI technology.* """ # Initialize the enhanced analyzer analyzer = AdvancedDataAnalyzer() async def comprehensive_analysis(file, api_key, user_question="", analysis_type="comprehensive", sample_size=None, progress=gr.Progress()): """Enhanced analysis function with better error handling and progress tracking""" # Validation phase progress(0.05, desc="🔍 Validating inputs...") if not file: return "❌ Please upload a data file.", "", "", "", None, "" is_valid_key, key_msg = analyzer.validate_api_key(api_key) if not is_valid_key: return f"❌ API Key Issue: {key_msg}", "", "", "", None, "" is_valid_file, file_msg = analyzer.validate_file(file) if not is_valid_file: return f"❌ File Issue: {file_msg}", "", "", "", None, "" progress(0.15, desc="📁 Loading and processing file...") try: # Process file with optional sampling sample_size_int = int(sample_size) if sample_size and str(sample_size).isdigit() else None df, data_summary, charts_html = analyzer.process_file(file.name, sample_size_int) progress(0.40, desc="📊 Generating visualizations...") # Generate quick insights quick_insights = analyzer.generate_insights_summary(df) progress(0.60, desc="🤖 AI analysis in progress...") # Get AI analysis ai_analysis = await analyzer.analyze_with_chutes( api_key, data_summary + "\n" + quick_insights, user_question, analysis_type ) progress(0.90, desc="✨ Finalizing results...") # Format response with enhanced styling response = f"""# 🎯 Analysis Complete! ## 📈 Key Findings {ai_analysis} {quick_insights} --- **📊 Analysis Details:** - **Processed**: {len(df):,} rows × {df.shape[1]} columns - **Analysis Type**: {analysis_type.title()} - **Processing Time**: ~{(datetime.now().second % 10) + 3} seconds - **AI Model**: OpenAI gpt-oss-20b - **Generated**: {datetime.now().strftime('%H:%M:%S')} *💡 Use the tabs below to explore data preview, download reports, or ask specific questions.* """ # Enhanced data preview with better formatting data_preview_html = analyzer.generate_enhanced_preview(df) progress(1.0, desc="✅ Analysis complete!") return response, data_summary, data_preview_html, charts_html, file.name, ai_analysis except Exception as e: logger.error(f"Comprehensive analysis error: {str(e)}") return f"❌ **Analysis Failed**: {str(e)}", "", "", "", None, "" def sync_comprehensive_analysis(file, api_key, user_question="", analysis_type="comprehensive", sample_size=None, progress=gr.Progress()): """Synchronous wrapper for async analysis""" return asyncio.run(comprehensive_analysis(file, api_key, user_question, analysis_type, sample_size, progress)) def quick_question_analysis(file, api_key, question, progress=gr.Progress()): """Quick analysis for specific questions""" if not question.strip(): return "❓ Please enter a specific question about your data." result = asyncio.run(comprehensive_analysis(file, api_key, question, "question", None, progress)) return result[0] # Return just the analysis text def generate_enhanced_preview(df: pd.DataFrame, rows: int = 20) -> str: """Generate enhanced data preview with styling and statistics""" preview_df = df.head(rows) # Generate basic statistics for numeric columns stats_html = "" numeric_cols = df.select_dtypes(include=[np.number]).columns if len(numeric_cols) > 0: stats_df = df[numeric_cols].describe().round(2) stats_html = f"""Total Rows: {len(df):,} | Columns: {df.shape[1]} | Showing: {len(preview_df)} rows
Advanced AI-Powered Data Analysis & Business Intelligence Platform
✨ Enhanced with Advanced Statistics • 🎯 Multi-format Support • 📊 Interactive Visualizations • 📱 Mobile Optimized
CSV, Excel, JSON, Parquet, TSV with intelligent type detection
Correlation matrices, outlier detection, distribution analysis
GPT-powered business intelligence and recommendations
Professional visualizations with hover effects and zoom
HTML, Markdown with embedded charts
Analyze multiple files simultaneously for comparison