shukdevdattaEX's picture
Create v1.txt
d513747 verified
import gradio as gr
import pandas as pd
import aiohttp
import asyncio
import json
import io
import os
from typing import Optional, Tuple
class DataAnalyzer:
def __init__(self):
self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
"""Send data to Chutes API for analysis"""
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json"
}
# Create the prompt based on whether it's initial analysis or follow-up question
if user_question:
prompt = f"""Based on this dataset summary:
{data_summary}
User question: {user_question}
Please provide a detailed answer based on the data."""
else:
prompt = f"""Analyze the following dataset and provide comprehensive insights:
{data_summary}
Please provide:
1. Key statistical insights
2. Notable patterns or trends
3. Data quality observations
4. Business recommendations
5. Potential areas for further analysis
Keep the analysis clear, actionable, and data-driven."""
body = {
"model": "openai/gpt-oss-20b",
"messages": [
{
"role": "user",
"content": prompt
}
],
"stream": True,
"max_tokens": 2048,
"temperature": 0.3 # Lower temperature for more consistent analysis
}
try:
async with aiohttp.ClientSession() as session:
async with session.post(self.api_base_url, headers=headers, json=body) as response:
if response.status != 200:
return f"Error: API request failed with status {response.status}"
full_response = ""
async for line in response.content:
line = line.decode("utf-8").strip()
if line.startswith("data: "):
data = line[6:]
if data == "[DONE]":
break
try:
chunk_data = json.loads(data)
if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
delta = chunk_data["choices"][0].get("delta", {})
content = delta.get("content", "")
if content:
full_response += content
except json.JSONDecodeError:
continue
return full_response if full_response else "No response received from the model."
except Exception as e:
return f"Error connecting to Chutes API: {str(e)}"
def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]:
"""Process uploaded CSV or Excel file"""
try:
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension == '.csv':
df = pd.read_csv(file_path)
elif file_extension in ['.xlsx', '.xls']:
df = pd.read_excel(file_path)
else:
raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
# Generate comprehensive data summary
summary = self.generate_data_summary(df)
return df, summary
except Exception as e:
raise Exception(f"Error processing file: {str(e)}")
def generate_data_summary(self, df: pd.DataFrame) -> str:
"""Generate a comprehensive summary of the dataset"""
summary = []
# Basic info
summary.append(f"Dataset Overview:")
summary.append(f"- Shape: {df.shape[0]} rows Γ— {df.shape[1]} columns")
summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}")
# Column information
summary.append(f"\nColumn Information:")
for i, (col, dtype) in enumerate(df.dtypes.items()):
null_count = df[col].isnull().sum()
null_pct = (null_count / len(df)) * 100
summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)")
# Numerical columns statistics
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
summary.append(f"\nNumerical Columns Summary:")
for col in numeric_cols:
stats = df[col].describe()
summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]")
# Categorical columns
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
if len(categorical_cols) > 0:
summary.append(f"\nCategorical Columns Summary:")
for col in categorical_cols:
unique_count = df[col].nunique()
most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'")
# Sample data
summary.append(f"\nFirst 5 rows preview:")
summary.append(df.head().to_string())
return "\n".join(summary)
# Initialize the analyzer
analyzer = DataAnalyzer()
async def analyze_data(file, api_key, user_question=""):
"""Main function to analyze uploaded data"""
if not file:
return "Please upload a CSV or Excel file.", "", ""
if not api_key:
return "Please enter your Chutes API key.", "", ""
try:
# Process the uploaded file
df, data_summary = analyzer.process_file(file.name)
# Get AI analysis
ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
# Format the complete response
response = f"""## πŸ“Š Data Analysis Complete!
### πŸ“ˆ Dataset Overview:
{data_summary}
### πŸ€– AI Insights & Recommendations:
{ai_analysis}
"""
return response, data_summary, df.head(10).to_html()
except Exception as e:
return f"Error: {str(e)}", "", ""
def sync_analyze_data(file, api_key, user_question=""):
"""Synchronous wrapper for the async analyze function"""
return asyncio.run(analyze_data(file, api_key, user_question))
# Create the Gradio interface
with gr.Blocks(title="πŸ“Š Smart Data Analyzer", theme=gr.themes.Ocean()) as app:
gr.Markdown("""
# πŸ“Š Smart Data Analyzer
### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes!
""")
with gr.Row():
with gr.Column(scale=1):
# File upload
file_input = gr.File(
label="πŸ“ Upload CSV or Excel File",
file_types=[".csv", ".xlsx", ".xls"],
file_count="single"
)
# API key input
api_key_input = gr.Textbox(
label="πŸ”‘ Chutes API Key",
placeholder="Enter your Chutes API token here...",
type="password",
lines=1
)
# Optional question input
question_input = gr.Textbox(
label="❓ Ask a Specific Question (Optional)",
placeholder="e.g., What are the sales trends? Which region performs best?",
lines=2
)
# Analyze button
analyze_btn = gr.Button("πŸš€ Analyze Data", variant="primary", size="lg")
with gr.Column(scale=2):
# Results display
analysis_output = gr.Markdown(
label="πŸ“‹ Analysis Results",
value="Upload a file and click 'Analyze Data' to see insights..."
)
# Additional outputs (hidden by default)
with gr.Accordion("πŸ“Š Data Preview", open=False):
data_preview = gr.HTML(label="First 10 Rows")
with gr.Accordion("πŸ” Raw Data Summary", open=False):
raw_summary = gr.Textbox(label="Dataset Summary", lines=10)
# Event handlers
analyze_btn.click(
fn=sync_analyze_data,
inputs=[file_input, api_key_input, question_input],
outputs=[analysis_output, raw_summary, data_preview]
)
# Example section
gr.Markdown("""
### πŸ’‘ Tips for Best Results:
- **File Size**: Keep files under 10MB for fastest processing
- **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai)
- **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations
- **Formats**: Supports CSV, XLSX, and XLS files
### 🎯 Example Questions to Ask:
- "What are the key trends in this sales data?"
- "Which products are underperforming?"
- "Are there any seasonal patterns?"
- "What recommendations do you have based on this data?"
""")
# Launch the application
if __name__ == "__main__":
app.launch(
share=True
)