|
import gradio as gr |
|
import pandas as pd |
|
import aiohttp |
|
import asyncio |
|
import json |
|
import io |
|
import os |
|
from typing import Optional, Tuple |
|
|
|
class DataAnalyzer: |
|
def __init__(self): |
|
self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" |
|
|
|
async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str: |
|
"""Send data to Chutes API for analysis""" |
|
headers = { |
|
"Authorization": f"Bearer {api_token}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
# Create the prompt based on whether it's initial analysis or follow-up question |
|
if user_question: |
|
prompt = f"""Based on this dataset summary: |
|
{data_summary} |
|
User question: {user_question} |
|
Please provide a detailed answer based on the data.""" |
|
else: |
|
prompt = f"""Analyze the following dataset and provide comprehensive insights: |
|
{data_summary} |
|
Please provide: |
|
1. Key statistical insights |
|
2. Notable patterns or trends |
|
3. Data quality observations |
|
4. Business recommendations |
|
5. Potential areas for further analysis |
|
Keep the analysis clear, actionable, and data-driven.""" |
|
|
|
body = { |
|
"model": "openai/gpt-oss-20b", |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": prompt |
|
} |
|
], |
|
"stream": True, |
|
"max_tokens": 2048, |
|
"temperature": 0.3 # Lower temperature for more consistent analysis |
|
} |
|
|
|
try: |
|
async with aiohttp.ClientSession() as session: |
|
async with session.post(self.api_base_url, headers=headers, json=body) as response: |
|
if response.status != 200: |
|
return f"Error: API request failed with status {response.status}" |
|
|
|
full_response = "" |
|
async for line in response.content: |
|
line = line.decode("utf-8").strip() |
|
if line.startswith("data: "): |
|
data = line[6:] |
|
if data == "[DONE]": |
|
break |
|
try: |
|
chunk_data = json.loads(data) |
|
if "choices" in chunk_data and len(chunk_data["choices"]) > 0: |
|
delta = chunk_data["choices"][0].get("delta", {}) |
|
content = delta.get("content", "") |
|
if content: |
|
full_response += content |
|
except json.JSONDecodeError: |
|
continue |
|
|
|
return full_response if full_response else "No response received from the model." |
|
|
|
except Exception as e: |
|
return f"Error connecting to Chutes API: {str(e)}" |
|
|
|
def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]: |
|
"""Process uploaded CSV or Excel file""" |
|
try: |
|
file_extension = os.path.splitext(file_path)[1].lower() |
|
|
|
if file_extension == '.csv': |
|
df = pd.read_csv(file_path) |
|
elif file_extension in ['.xlsx', '.xls']: |
|
df = pd.read_excel(file_path) |
|
else: |
|
raise ValueError("Unsupported file format. Please upload CSV or Excel files.") |
|
|
|
# Generate comprehensive data summary |
|
summary = self.generate_data_summary(df) |
|
return df, summary |
|
|
|
except Exception as e: |
|
raise Exception(f"Error processing file: {str(e)}") |
|
|
|
def generate_data_summary(self, df: pd.DataFrame) -> str: |
|
"""Generate a comprehensive summary of the dataset""" |
|
summary = [] |
|
|
|
# Basic info |
|
summary.append(f"Dataset Overview:") |
|
summary.append(f"- Shape: {df.shape[0]} rows Γ {df.shape[1]} columns") |
|
summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}") |
|
|
|
# Column information |
|
summary.append(f"\nColumn Information:") |
|
for i, (col, dtype) in enumerate(df.dtypes.items()): |
|
null_count = df[col].isnull().sum() |
|
null_pct = (null_count / len(df)) * 100 |
|
summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)") |
|
|
|
# Numerical columns statistics |
|
numeric_cols = df.select_dtypes(include=['number']).columns |
|
if len(numeric_cols) > 0: |
|
summary.append(f"\nNumerical Columns Summary:") |
|
for col in numeric_cols: |
|
stats = df[col].describe() |
|
summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]") |
|
|
|
# Categorical columns |
|
categorical_cols = df.select_dtypes(include=['object', 'category']).columns |
|
if len(categorical_cols) > 0: |
|
summary.append(f"\nCategorical Columns Summary:") |
|
for col in categorical_cols: |
|
unique_count = df[col].nunique() |
|
most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A" |
|
summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'") |
|
|
|
# Sample data |
|
summary.append(f"\nFirst 5 rows preview:") |
|
summary.append(df.head().to_string()) |
|
|
|
return "\n".join(summary) |
|
|
|
# Initialize the analyzer |
|
analyzer = DataAnalyzer() |
|
|
|
async def analyze_data(file, api_key, user_question=""): |
|
"""Main function to analyze uploaded data""" |
|
if not file: |
|
return "Please upload a CSV or Excel file.", "", "" |
|
|
|
if not api_key: |
|
return "Please enter your Chutes API key.", "", "" |
|
|
|
try: |
|
# Process the uploaded file |
|
df, data_summary = analyzer.process_file(file.name) |
|
|
|
# Get AI analysis |
|
ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question) |
|
|
|
# Format the complete response |
|
response = f"""## π Data Analysis Complete! |
|
### π Dataset Overview: |
|
{data_summary} |
|
### π€ AI Insights & Recommendations: |
|
{ai_analysis} |
|
""" |
|
|
|
return response, data_summary, df.head(10).to_html() |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}", "", "" |
|
|
|
def sync_analyze_data(file, api_key, user_question=""): |
|
"""Synchronous wrapper for the async analyze function""" |
|
return asyncio.run(analyze_data(file, api_key, user_question)) |
|
|
|
# Create the Gradio interface |
|
with gr.Blocks(title="π Smart Data Analyzer", theme=gr.themes.Ocean()) as app: |
|
gr.Markdown(""" |
|
# π Smart Data Analyzer |
|
### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes! |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
# File upload |
|
file_input = gr.File( |
|
label="π Upload CSV or Excel File", |
|
file_types=[".csv", ".xlsx", ".xls"], |
|
file_count="single" |
|
) |
|
|
|
# API key input |
|
api_key_input = gr.Textbox( |
|
label="π Chutes API Key", |
|
placeholder="Enter your Chutes API token here...", |
|
type="password", |
|
lines=1 |
|
) |
|
|
|
# Optional question input |
|
question_input = gr.Textbox( |
|
label="β Ask a Specific Question (Optional)", |
|
placeholder="e.g., What are the sales trends? Which region performs best?", |
|
lines=2 |
|
) |
|
|
|
# Analyze button |
|
analyze_btn = gr.Button("π Analyze Data", variant="primary", size="lg") |
|
|
|
with gr.Column(scale=2): |
|
# Results display |
|
analysis_output = gr.Markdown( |
|
label="π Analysis Results", |
|
value="Upload a file and click 'Analyze Data' to see insights..." |
|
) |
|
|
|
# Additional outputs (hidden by default) |
|
with gr.Accordion("π Data Preview", open=False): |
|
data_preview = gr.HTML(label="First 10 Rows") |
|
|
|
with gr.Accordion("π Raw Data Summary", open=False): |
|
raw_summary = gr.Textbox(label="Dataset Summary", lines=10) |
|
|
|
# Event handlers |
|
analyze_btn.click( |
|
fn=sync_analyze_data, |
|
inputs=[file_input, api_key_input, question_input], |
|
outputs=[analysis_output, raw_summary, data_preview] |
|
) |
|
|
|
# Example section |
|
gr.Markdown(""" |
|
### π‘ Tips for Best Results: |
|
- **File Size**: Keep files under 10MB for fastest processing |
|
- **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai) |
|
- **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations |
|
- **Formats**: Supports CSV, XLSX, and XLS files |
|
|
|
### π― Example Questions to Ask: |
|
- "What are the key trends in this sales data?" |
|
- "Which products are underperforming?" |
|
- "Are there any seasonal patterns?" |
|
- "What recommendations do you have based on this data?" |
|
""") |
|
|
|
# Launch the application |
|
if __name__ == "__main__": |
|
app.launch( |
|
share=True |
|
) |