import gradio as gr import pandas as pd import aiohttp import asyncio import json import io import os from typing import Optional, Tuple class DataAnalyzer: def __init__(self): self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str: """Send data to Chutes API for analysis""" headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json" } # Create the prompt based on whether it's initial analysis or follow-up question if user_question: prompt = f"""Based on this dataset summary: {data_summary} User question: {user_question} Please provide a detailed answer based on the data.""" else: prompt = f"""Analyze the following dataset and provide comprehensive insights: {data_summary} Please provide: 1. Key statistical insights 2. Notable patterns or trends 3. Data quality observations 4. Business recommendations 5. Potential areas for further analysis Keep the analysis clear, actionable, and data-driven.""" body = { "model": "openai/gpt-oss-20b", "messages": [ { "role": "user", "content": prompt } ], "stream": True, "max_tokens": 2048, "temperature": 0.3 # Lower temperature for more consistent analysis } try: async with aiohttp.ClientSession() as session: async with session.post(self.api_base_url, headers=headers, json=body) as response: if response.status != 200: return f"Error: API request failed with status {response.status}" full_response = "" async for line in response.content: line = line.decode("utf-8").strip() if line.startswith("data: "): data = line[6:] if data == "[DONE]": break try: chunk_data = json.loads(data) if "choices" in chunk_data and len(chunk_data["choices"]) > 0: delta = chunk_data["choices"][0].get("delta", {}) content = delta.get("content", "") if content: full_response += content except json.JSONDecodeError: continue return full_response if full_response else "No response received from the model." except Exception as e: return f"Error connecting to Chutes API: {str(e)}" def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]: """Process uploaded CSV or Excel file""" try: file_extension = os.path.splitext(file_path)[1].lower() if file_extension == '.csv': df = pd.read_csv(file_path) elif file_extension in ['.xlsx', '.xls']: df = pd.read_excel(file_path) else: raise ValueError("Unsupported file format. Please upload CSV or Excel files.") # Generate comprehensive data summary summary = self.generate_data_summary(df) return df, summary except Exception as e: raise Exception(f"Error processing file: {str(e)}") def generate_data_summary(self, df: pd.DataFrame) -> str: """Generate a comprehensive summary of the dataset""" summary = [] # Basic info summary.append(f"Dataset Overview:") summary.append(f"- Shape: {df.shape[0]} rows × {df.shape[1]} columns") summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}") # Column information summary.append(f"\nColumn Information:") for i, (col, dtype) in enumerate(df.dtypes.items()): null_count = df[col].isnull().sum() null_pct = (null_count / len(df)) * 100 summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)") # Numerical columns statistics numeric_cols = df.select_dtypes(include=['number']).columns if len(numeric_cols) > 0: summary.append(f"\nNumerical Columns Summary:") for col in numeric_cols: stats = df[col].describe() summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]") # Categorical columns categorical_cols = df.select_dtypes(include=['object', 'category']).columns if len(categorical_cols) > 0: summary.append(f"\nCategorical Columns Summary:") for col in categorical_cols: unique_count = df[col].nunique() most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A" summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'") # Sample data summary.append(f"\nFirst 5 rows preview:") summary.append(df.head().to_string()) return "\n".join(summary) # Initialize the analyzer analyzer = DataAnalyzer() async def analyze_data(file, api_key, user_question=""): """Main function to analyze uploaded data""" if not file: return "Please upload a CSV or Excel file.", "", "" if not api_key: return "Please enter your Chutes API key.", "", "" try: # Process the uploaded file df, data_summary = analyzer.process_file(file.name) # Get AI analysis ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question) # Format the complete response response = f"""## 📊 Data Analysis Complete! ### 📈 Dataset Overview: {data_summary} ### 🤖 AI Insights & Recommendations: {ai_analysis} """ return response, data_summary, df.head(10).to_html() except Exception as e: return f"Error: {str(e)}", "", "" def sync_analyze_data(file, api_key, user_question=""): """Synchronous wrapper for the async analyze function""" return asyncio.run(analyze_data(file, api_key, user_question)) # Create the Gradio interface with gr.Blocks(title="📊 Smart Data Analyzer", theme=gr.themes.Ocean()) as app: gr.Markdown(""" # 📊 Smart Data Analyzer ### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes! """) with gr.Row(): with gr.Column(scale=1): # File upload file_input = gr.File( label="📁 Upload CSV or Excel File", file_types=[".csv", ".xlsx", ".xls"], file_count="single" ) # API key input api_key_input = gr.Textbox( label="🔑 Chutes API Key", placeholder="Enter your Chutes API token here...", type="password", lines=1 ) # Optional question input question_input = gr.Textbox( label="❓ Ask a Specific Question (Optional)", placeholder="e.g., What are the sales trends? Which region performs best?", lines=2 ) # Analyze button analyze_btn = gr.Button("🚀 Analyze Data", variant="primary", size="lg") with gr.Column(scale=2): # Results display analysis_output = gr.Markdown( label="📋 Analysis Results", value="Upload a file and click 'Analyze Data' to see insights..." ) # Additional outputs (hidden by default) with gr.Accordion("📊 Data Preview", open=False): data_preview = gr.HTML(label="First 10 Rows") with gr.Accordion("🔍 Raw Data Summary", open=False): raw_summary = gr.Textbox(label="Dataset Summary", lines=10) # Event handlers analyze_btn.click( fn=sync_analyze_data, inputs=[file_input, api_key_input, question_input], outputs=[analysis_output, raw_summary, data_preview] ) # Example section gr.Markdown(""" ### 💡 Tips for Best Results: - **File Size**: Keep files under 10MB for fastest processing - **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai) - **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations - **Formats**: Supports CSV, XLSX, and XLS files ### 🎯 Example Questions to Ask: - "What are the key trends in this sales data?" - "Which products are underperforming?" - "Are there any seasonal patterns?" - "What recommendations do you have based on this data?" """) # Launch the application if __name__ == "__main__": app.launch( share=True )