|
import gradio as gr |
|
import pandas as pd |
|
import aiohttp |
|
import asyncio |
|
import json |
|
import os |
|
import numpy as np |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from typing import Optional, Tuple, Dict, Any |
|
import logging |
|
from datetime import datetime |
|
import re |
|
from jinja2 import Template |
|
import markdown # Requires 'markdown' package: install via `pip install markdown` |
|
|
|
# Configure logging |
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class EnhancedDataAnalyzer: |
|
def __init__(self): |
|
self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" |
|
self.max_file_size = 50 * 1024 * 1024 # 50MB limit |
|
self.conversation_history = [] |
|
self.current_df = None |
|
self.current_charts = None |
|
|
|
def validate_api_key(self, api_key: str) -> bool: |
|
"""Validate API key format""" |
|
return bool(api_key and len(api_key.strip()) > 10) |
|
|
|
def validate_file(self, file) -> Tuple[bool, str]: |
|
"""Validate uploaded file""" |
|
if not file: |
|
return False, "No file uploaded" |
|
|
|
file_size = os.path.getsize(file.name) |
|
if file_size > self.max_file_size: |
|
return False, f"File too large. Maximum size: {self.max_file_size // (1024*1024)}MB" |
|
|
|
file_extension = os.path.splitext(file.name)[1].lower() |
|
if file_extension not in ['.csv', '.xlsx', '.xls']: |
|
return False, "Unsupported format. Please upload CSV or Excel files only." |
|
|
|
return True, "File valid" |
|
|
|
async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str: |
|
"""Enhanced API call with better error handling and streaming""" |
|
headers = { |
|
"Authorization": f"Bearer {api_token.strip()}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
# Create context-aware prompt |
|
if user_question: |
|
prompt = f"""You are a data analyst expert. Based on this dataset: |
|
{data_summary} |
|
User's specific question: {user_question} |
|
Provide a detailed, actionable answer with specific data points and recommendations.""" |
|
else: |
|
prompt = f"""You are a senior data analyst. Analyze this dataset thoroughly: |
|
{data_summary} |
|
Provide a comprehensive analysis including: |
|
1. **Key Statistical Insights**: Most important numbers and what they mean |
|
2. **Patterns & Trends**: Notable patterns, correlations, or anomalies |
|
3. **Data Quality Assessment**: Missing values, outliers, data consistency |
|
4. **Business Intelligence**: Actionable insights and opportunities |
|
5. **Recommendations**: Specific next steps or areas to investigate |
|
Format your response with clear sections and bullet points for readability.""" |
|
|
|
body = { |
|
"model": "openai/gpt-oss-20b", |
|
"messages": [ |
|
{ |
|
"role": "system", |
|
"content": "You are an expert data analyst who provides clear, actionable insights from datasets. Always structure your responses with clear headings and specific data points." |
|
}, |
|
{ |
|
"role": "user", |
|
"content": prompt |
|
} |
|
], |
|
"stream": True, |
|
"max_tokens": 3000, |
|
"temperature": 0.2, |
|
"top_p": 0.9 |
|
} |
|
|
|
try: |
|
timeout = aiohttp.ClientTimeout(total=30) |
|
async with aiohttp.ClientSession(timeout=timeout) as session: |
|
async with session.post(self.api_base_url, headers=headers, json=body) as response: |
|
if response.status == 401: |
|
return "β **Authentication Error**: Invalid API key. Please check your Chutes API token." |
|
elif response.status == 429: |
|
return "β³ **Rate Limit**: Too many requests. Please wait a moment and try again." |
|
elif response.status != 200: |
|
return f"β **API Error**: Request failed with status {response.status}" |
|
|
|
full_response = "" |
|
async for line in response.content: |
|
line = line.decode("utf-8").strip() |
|
if line.startswith("data: "): |
|
data = line[6:] |
|
if data == "[DONE]": |
|
break |
|
try: |
|
chunk_data = json.loads(data) |
|
if "choices" in chunk_data and len(chunk_data["choices"]) > 0: |
|
delta = chunk_data["choices"][0].get("delta", {}) |
|
content = delta.get("content", "") |
|
if content: |
|
full_response += content |
|
except json.JSONDecodeError: |
|
continue |
|
|
|
return full_response if full_response else "β οΈ No response received from the model." |
|
|
|
except asyncio.TimeoutError: |
|
return "β° **Timeout Error**: Request took too long. Please try again." |
|
except Exception as e: |
|
logger.error(f"API Error: {str(e)}") |
|
return f"β **Connection Error**: {str(e)}" |
|
|
|
def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, str]: |
|
"""Enhanced file processing with better error handling""" |
|
try: |
|
file_extension = os.path.splitext(file_path)[1].lower() |
|
|
|
if file_extension == '.csv': |
|
for encoding in ['utf-8', 'latin-1', 'cp1252']: |
|
try: |
|
df = pd.read_csv(file_path, encoding=encoding) |
|
break |
|
except UnicodeDecodeError: |
|
continue |
|
else: |
|
raise ValueError("Could not decode CSV file. Please check file encoding.") |
|
elif file_extension in ['.xlsx', '.xls']: |
|
df = pd.read_excel(file_path) |
|
else: |
|
raise ValueError("Unsupported file format. Please upload CSV or Excel files.") |
|
|
|
df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True) |
|
self.current_df = df |
|
data_summary = self.generate_enhanced_summary(df) |
|
charts_html = self.generate_visualizations(df) |
|
|
|
return df, data_summary, charts_html |
|
|
|
except Exception as e: |
|
raise Exception(f"Error processing file: {str(e)}") |
|
|
|
def generate_enhanced_summary(self, df: pd.DataFrame) -> str: |
|
"""Generate comprehensive data summary with statistical insights""" |
|
summary = [] |
|
summary.append(f"# π Dataset Analysis Report") |
|
summary.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
summary.append(f"**File Size**: {df.shape[0]:,} rows Γ {df.shape[1]} columns") |
|
memory_usage = df.memory_usage(deep=True).sum() / 1024**2 |
|
summary.append(f"**Memory Usage**: {memory_usage:.2f} MB\n") |
|
|
|
type_counts = df.dtypes.value_counts() |
|
summary.append("## π Column Types:") |
|
for dtype, count in type_counts.items(): |
|
summary.append(f"- **{dtype}**: {count} columns") |
|
|
|
missing_data = df.isnull().sum() |
|
missing_pct = (missing_data / len(df) * 100).round(2) |
|
missing_summary = missing_data[missing_data > 0].sort_values(ascending=False) |
|
|
|
if len(missing_summary) > 0: |
|
summary.append("\n## β οΈ Missing Data:") |
|
for col, count in missing_summary.head(10).items(): |
|
pct = missing_pct[col] |
|
summary.append(f"- **{col}**: {count:,} missing ({pct}%)") |
|
else: |
|
summary.append("\n## β
Data Quality: No missing values detected!") |
|
|
|
numeric_cols = df.select_dtypes(include=[np.number]).columns |
|
if len(numeric_cols) > 0: |
|
summary.append(f"\n## π Numerical Columns Analysis ({len(numeric_cols)} columns):") |
|
for col in numeric_cols[:10]: |
|
stats = df[col].describe() |
|
outliers = len(df[df[col] > (stats['75%'] + 1.5 * (stats['75%'] - stats['25%']))]) |
|
summary.append(f"- **{col}**: ΞΌ={stats['mean']:.2f}, Ο={stats['std']:.2f}, outliers={outliers}") |
|
|
|
categorical_cols = df.select_dtypes(include=['object', 'category']).columns |
|
if len(categorical_cols) > 0: |
|
summary.append(f"\n## π Categorical Columns Analysis ({len(categorical_cols)} columns):") |
|
for col in categorical_cols[:10]: |
|
unique_count = df[col].nunique() |
|
cardinality = "High" if unique_count > len(df) * 0.9 else "Medium" if unique_count > 10 else "Low" |
|
most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A" |
|
summary.append(f"- **{col}**: {unique_count:,} unique values ({cardinality} cardinality), Top: '{most_common}'") |
|
|
|
summary.append("\n## π Data Sample (First 3 Rows):") |
|
sample_df = df.head(3) |
|
for idx, row in sample_df.iterrows(): |
|
summary.append(f"\n**Row {idx + 1}:**") |
|
for col, val in row.items(): |
|
summary.append(f" - {col}: {val}") |
|
|
|
return "\n".join(summary) |
|
|
|
def generate_visualizations(self, df: pd.DataFrame) -> str: |
|
"""Generate comprehensive visualizations for the dataset""" |
|
charts_html = [] |
|
|
|
try: |
|
missing_data = df.isnull().sum() |
|
if missing_data.sum() > 0: |
|
fig = px.bar( |
|
x=missing_data.index, |
|
y=missing_data.values, |
|
title="π Missing Data Analysis", |
|
labels={'x': 'Columns', 'y': 'Missing Values Count'}, |
|
color=missing_data.values, |
|
color_continuous_scale='Reds' |
|
) |
|
fig.update_layout( |
|
height=400, |
|
showlegend=False, |
|
title_x=0.5, |
|
xaxis_tickangle=-45 |
|
) |
|
charts_html.append(f"<h3>π Data Quality Overview</h3>") |
|
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="missing_data_chart")) |
|
|
|
numeric_cols = df.select_dtypes(include=[np.number]).columns |
|
if len(numeric_cols) > 1: |
|
corr_matrix = df[numeric_cols].corr() |
|
fig = px.imshow( |
|
corr_matrix, |
|
title="π Correlation Matrix - Numerical Variables", |
|
color_continuous_scale='RdBu_r', |
|
aspect="auto", |
|
text_auto=True |
|
) |
|
fig.update_layout(height=500, title_x=0.5) |
|
charts_html.append(f"<h3>π Correlation Analysis</h3>") |
|
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="correlation_chart")) |
|
|
|
if len(numeric_cols) > 0: |
|
for i, col in enumerate(numeric_cols[:3]): |
|
fig = px.histogram( |
|
df, |
|
x=col, |
|
title=f"π Distribution: {col}", |
|
marginal="box", |
|
nbins=30 |
|
) |
|
fig.update_layout(height=400, title_x=0.5) |
|
if i == 0: |
|
charts_html.append(f"<h3>π Data Distributions</h3>") |
|
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"dist_chart_{i}")) |
|
|
|
categorical_cols = df.select_dtypes(include=['object', 'category']).columns |
|
if len(categorical_cols) > 0: |
|
for i, col in enumerate(categorical_cols[:2]): |
|
if df[col].nunique() <= 20: |
|
value_counts = df[col].value_counts().head(10) |
|
fig = px.bar( |
|
x=value_counts.values, |
|
y=value_counts.index, |
|
orientation='h', |
|
title=f"π Top 10 Values: {col}", |
|
labels={'x': 'Count', 'y': col} |
|
) |
|
fig.update_layout(height=400, title_x=0.5) |
|
if i == 0: |
|
charts_html.append(f"<h3>π Categorical Data Analysis</h3>") |
|
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"cat_chart_{i}")) |
|
|
|
summary_data = { |
|
'Metric': ['Total Rows', 'Total Columns', 'Numeric Columns', 'Categorical Columns', 'Missing Values'], |
|
'Count': [ |
|
len(df), |
|
len(df.columns), |
|
len(numeric_cols), |
|
len(categorical_cols), |
|
df.isnull().sum().sum() |
|
] |
|
} |
|
|
|
fig = px.bar( |
|
summary_data, |
|
x='Metric', |
|
y='Count', |
|
title="π Dataset Overview", |
|
color='Count', |
|
color_continuous_scale='Blues' |
|
) |
|
fig.update_layout(height=400, title_x=0.5, showlegend=False) |
|
charts_html.append(f"<h3>π Dataset Overview</h3>") |
|
charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="overview_chart")) |
|
|
|
self.current_charts = charts_html |
|
return "\n".join(charts_html) if charts_html else "<p>No charts could be generated for this dataset.</p>" |
|
|
|
except Exception as e: |
|
logger.error(f"Chart generation error: {str(e)}") |
|
return f"<p>β Chart generation failed: {str(e)}</p>" |
|
|
|
def generate_report_html(self, analysis_text: str, data_summary: str, file_name: str = "Unknown") -> str: |
|
"""Generate HTML report with properly formatted text and print button""" |
|
html_template = """ |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<title>Data Analysis Report</title> |
|
<style> |
|
body { |
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
|
line-height: 1.6; |
|
color: #333; |
|
max-width: 1200px; |
|
margin: 0 auto; |
|
padding: 20px; |
|
background: #f8f9fa; |
|
} |
|
.header { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
color: white; |
|
padding: 30px; |
|
border-radius: 10px; |
|
margin-bottom: 30px; |
|
text-align: center; |
|
} |
|
.section { |
|
background: white; |
|
padding: 25px; |
|
margin-bottom: 20px; |
|
border-radius: 8px; |
|
box-shadow: 0 2px 10px rgba(0,0,0,0.1); |
|
} |
|
.chart-container { |
|
margin: 20px 0; |
|
padding: 15px; |
|
background: #f8f9ff; |
|
border-radius: 8px; |
|
border-left: 4px solid #667eea; |
|
} |
|
h1, h2, h3 { |
|
color: #2c3e50; |
|
margin-top: 20px; |
|
margin-bottom: 15px; |
|
} |
|
.metadata { |
|
background: #e8f4f8; |
|
padding: 15px; |
|
border-radius: 5px; |
|
margin-bottom: 20px; |
|
} |
|
.footer { |
|
text-align: center; |
|
color: #666; |
|
margin-top: 40px; |
|
padding: 20px; |
|
background: #f1f1f1; |
|
border-radius: 5px; |
|
} |
|
pre { |
|
background: #f4f4f4; |
|
padding: 15px; |
|
border-radius: 5px; |
|
overflow-x: auto; |
|
white-space: pre-wrap; |
|
font-size: 14px; |
|
} |
|
strong { |
|
color: #2c3e50; |
|
font-weight: 600; |
|
} |
|
table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
margin: 20px 0; |
|
} |
|
th, td { |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
text-align: left; |
|
} |
|
th { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
color: white; |
|
} |
|
tr:nth-child(even) { |
|
background-color: #f2f2f2; |
|
} |
|
.print-button { |
|
background: #667eea; |
|
color: white; |
|
padding: 10px 20px; |
|
border: none; |
|
border-radius: 5px; |
|
cursor: pointer; |
|
font-size: 16px; |
|
margin: 10px 0; |
|
display: inline-block; |
|
} |
|
.print-button:hover { |
|
background: #764ba2; |
|
} |
|
@media print { |
|
.print-button { |
|
display: none; |
|
} |
|
body { |
|
background: white; |
|
} |
|
.section, .metadata, .footer { |
|
box-shadow: none; |
|
} |
|
} |
|
</style> |
|
<script> |
|
function printReport() { |
|
window.print(); |
|
} |
|
</script> |
|
</head> |
|
<body> |
|
<div class="header"> |
|
<h1>π Smart Data Analysis Report</h1> |
|
<p>Comprehensive AI-Powered Data Insights</p> |
|
</div> |
|
|
|
<div class="metadata"> |
|
<strong>π File:</strong> {{ file_name }}<br> |
|
<strong>π
Generated:</strong> {{ timestamp }}<br> |
|
<strong>π€ Model:</strong> OpenAI gpt-oss-20b |
|
</div> |
|
|
|
<div class="section"> |
|
<h2>π― AI Analysis & Insights</h2> |
|
<button class="print-button" onclick="printReport()">π¨οΈ Print as PDF</button> |
|
<div>{{ ai_analysis }}</div> |
|
</div> |
|
|
|
<div class="section"> |
|
<h2>π Visualizations</h2> |
|
<div class="chart-container"> |
|
{{ charts_html }} |
|
</div> |
|
</div> |
|
|
|
<div class="section"> |
|
<h2>π Technical Data Summary</h2> |
|
<pre>{{ data_summary }}</pre> |
|
</div> |
|
|
|
<div class="footer"> |
|
<p>Report generated by Smart Data Analyzer Pro β’ Powered by Smart AI</p> |
|
<p>For questions or support, contact +8801719296601 (via Whatsapp)</p> |
|
</div> |
|
</body> |
|
</html> |
|
""" |
|
|
|
template = Template(html_template) |
|
ai_analysis_html = markdown.markdown(analysis_text, extensions=['extra', 'tables']) |
|
charts_content = "\n".join(self.current_charts) if self.current_charts else "<p>No visualizations available</p>" |
|
|
|
return template.render( |
|
file_name=file_name, |
|
timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), |
|
ai_analysis=ai_analysis_html, |
|
charts_html=charts_content, |
|
data_summary=data_summary |
|
) |
|
|
|
analyzer = EnhancedDataAnalyzer() |
|
|
|
async def analyze_data(file, api_key, user_question="", progress=gr.Progress()): |
|
if not file: |
|
return "β Please upload a CSV or Excel file.", "", "", "", None |
|
|
|
if not analyzer.validate_api_key(api_key): |
|
return "β Please enter a valid Chutes API key (minimum 10 characters).", "", "", "", None |
|
|
|
is_valid, validation_msg = analyzer.validate_file(file) |
|
if not is_valid: |
|
return f"β {validation_msg}", "", "", "", None |
|
|
|
progress(0.1, desc="π Reading file...") |
|
try: |
|
df, data_summary, charts_html = analyzer.process_file(file.name) |
|
progress(0.3, desc="π Processing data...") |
|
progress(0.5, desc="π€ Generating AI insights...") |
|
ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question) |
|
progress(0.9, desc="β¨ Finalizing results...") |
|
|
|
response = f"""# π― Analysis Complete! |
|
{ai_analysis} |
|
--- |
|
*Analysis powered by OpenAI gpt-oss-20b via Chutes β’ Generated at {datetime.now().strftime('%H:%M:%S')}* |
|
""" |
|
data_preview_html = df.head(15).to_html( |
|
classes="table table-striped table-hover", |
|
table_id="data-preview-table", |
|
escape=False |
|
) |
|
styled_preview = f""" |
|
<style> |
|
#data-preview-table {{ |
|
width: 100%; |
|
border-collapse: collapse; |
|
margin: 20px 0; |
|
font-size: 14px; |
|
}} |
|
#data-preview-table th {{ |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
color: white; |
|
padding: 12px 8px; |
|
text-align: left; |
|
font-weight: bold; |
|
}} |
|
#data-preview-table td {{ |
|
padding: 10px 8px; |
|
border-bottom: 1px solid #ddd; |
|
}} |
|
#data-preview-table tr:hover {{ |
|
background-color: #f5f5f5; |
|
}} |
|
</style> |
|
{data_preview_html} |
|
""" |
|
|
|
progress(1.0, desc="β
Done!") |
|
return response, data_summary, styled_preview, charts_html, file.name |
|
|
|
except Exception as e: |
|
logger.error(f"Analysis error: {str(e)}") |
|
return f"β **Error**: {str(e)}", "", "", "", None |
|
|
|
def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()): |
|
return asyncio.run(analyze_data(file, api_key, user_question, progress)) |
|
|
|
def clear_all(): |
|
analyzer.current_df = None |
|
analyzer.current_charts = None |
|
return None, "", "", "", "", "", "", None |
|
|
|
def download_report(analysis_text, data_summary, file_name, format_choice): |
|
if not analysis_text: |
|
return None, "β No analysis data available for download." |
|
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') |
|
file_base_name = os.path.splitext(file_name)[0] if file_name else "data_analysis" |
|
|
|
try: |
|
if format_choice == "HTML": |
|
html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name) |
|
filename = f"{file_base_name}_analysis_report_{timestamp}.html" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
f.write(html_content) |
|
return filename, f"β
HTML report generated successfully! File: {filename}" |
|
|
|
else: # Markdown |
|
report = f"""# Data Analysis Report |
|
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} |
|
File: {file_name} |
|
## AI Analysis: |
|
{analysis_text} |
|
## Raw Data Summary: |
|
{data_summary} |
|
""" |
|
filename = f"{file_base_name}_analysis_report_{timestamp}.md" |
|
with open(filename, 'w', encoding='utf-8') as f: |
|
f.write(report) |
|
return filename, f"β
Markdown report generated successfully! File: {filename}" |
|
|
|
except Exception as e: |
|
logger.error(f"Report generation error: {str(e)}") |
|
return None, f"β Error generating report: {str(e)}" |
|
|
|
with gr.Blocks( |
|
title="π Smart Data Analyzer Pro", |
|
theme=gr.themes.Ocean(), |
|
css=""" |
|
.gradio-container { |
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
|
} |
|
.tab-nav { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
} |
|
.upload-area { |
|
border: 2px dashed #667eea; |
|
border-radius: 10px; |
|
padding: 20px; |
|
text-align: center; |
|
background: #f8f9ff; |
|
} |
|
""" |
|
) as app: |
|
current_file_name = gr.State("") |
|
|
|
gr.Markdown(""" |
|
# π Smart Data Analyzer Pro |
|
### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b |
|
|
|
Upload your data files and get instant professional insights and downloadable reports! |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("### βοΈ Configuration") |
|
api_key_input = gr.Textbox( |
|
label="π Chutes API Key", |
|
placeholder="sk-chutes-your-api-key-here...", |
|
type="password", |
|
lines=1, |
|
info="Get your free API key from chutes.ai" |
|
) |
|
file_input = gr.File( |
|
label="π Upload Data File", |
|
file_types=[".csv", ".xlsx", ".xls"], |
|
file_count="single", |
|
elem_classes=["upload-area"] |
|
) |
|
with gr.Row(): |
|
analyze_btn = gr.Button("π Analyze Data", variant="primary", size="lg") |
|
clear_btn = gr.Button("ποΈ Clear All", variant="secondary") |
|
with gr.Group(): |
|
gr.Markdown("### π Quick Stats") |
|
file_stats = gr.Textbox( |
|
label="File Information", |
|
lines=3, |
|
interactive=False, |
|
placeholder="Upload a file to see statistics..." |
|
) |
|
|
|
with gr.Column(scale=2): |
|
gr.Markdown("### π― Analysis Results") |
|
analysis_output = gr.Markdown( |
|
value="π **Ready to analyze your data!**\n\nUpload a CSV or Excel file and click 'Analyze Data' to get started.", |
|
show_label=False |
|
) |
|
|
|
with gr.Tabs(): |
|
with gr.Tab("π¬ Ask Questions"): |
|
question_input = gr.Textbox( |
|
label="β Ask Specific Questions About Your Data", |
|
placeholder="Examples:\nβ’ What are the top 5 customers by revenue?\nβ’ Are there any seasonal trends?\nβ’ Which products have the highest margins?\nβ’ What anomalies do you see in this data?", |
|
lines=3 |
|
) |
|
ask_btn = gr.Button("π Get Answer", variant="primary") |
|
question_output = gr.Markdown() |
|
|
|
with gr.Tab("π Data Preview"): |
|
data_preview = gr.HTML( |
|
label="Dataset Preview", |
|
value="<p>Upload a file to see data preview...</p>" |
|
) |
|
|
|
with gr.Tab("π Raw Summary"): |
|
raw_summary = gr.Textbox( |
|
label="Detailed Data Summary", |
|
lines=15, |
|
max_lines=20, |
|
show_copy_button=True |
|
) |
|
|
|
with gr.Tab("πΎ Export Reports"): |
|
gr.Markdown("### π₯ Download Your Analysis Report") |
|
with gr.Row(): |
|
format_choice = gr.Radio( |
|
choices=["HTML", "Markdown"], |
|
value="HTML", |
|
label="π Report Format", |
|
info="Choose your preferred download format" |
|
) |
|
download_btn = gr.Button("π₯ Generate & Download Report", variant="primary", size="lg") |
|
download_status = gr.Textbox(label="Download Status", interactive=False) |
|
download_file = gr.File(label="π Download Link", visible=True) |
|
|
|
def update_file_stats(file): |
|
if not file: |
|
return "No file uploaded" |
|
try: |
|
file_size = os.path.getsize(file.name) / (1024 * 1024) |
|
file_name = os.path.basename(file.name) |
|
return f"π **File**: {file_name}\nπ **Size**: {file_size:.2f} MB\nβ° **Uploaded**: {datetime.now().strftime('%H:%M:%S')}" |
|
except: |
|
return "File information unavailable" |
|
|
|
def handle_analysis(file, api_key, user_question="", progress=gr.Progress()): |
|
result = sync_analyze_data(file, api_key, user_question, progress) |
|
if len(result) == 5: |
|
return result[0], result[1], result[2], result[4] |
|
else: |
|
return result[0], result[1], result[2], "" |
|
|
|
def handle_question_analysis(file, api_key, question, progress=gr.Progress()): |
|
if not question.strip(): |
|
return "β Please enter a specific question about your data." |
|
result = sync_analyze_data(file, api_key, question, progress) |
|
return result[0] |
|
|
|
analyze_btn.click( |
|
fn=handle_analysis, |
|
inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)], |
|
outputs=[analysis_output, raw_summary, data_preview, current_file_name], |
|
show_progress=True |
|
) |
|
|
|
ask_btn.click( |
|
fn=handle_question_analysis, |
|
inputs=[file_input, api_key_input, question_input], |
|
outputs=[question_output], |
|
show_progress=True |
|
) |
|
|
|
file_input.change( |
|
fn=update_file_stats, |
|
inputs=[file_input], |
|
outputs=[file_stats] |
|
) |
|
|
|
clear_btn.click( |
|
fn=clear_all, |
|
outputs=[file_input, api_key_input, question_input, analysis_output, |
|
question_output, data_preview, raw_summary, current_file_name] |
|
) |
|
|
|
download_btn.click( |
|
fn=download_report, |
|
inputs=[analysis_output, raw_summary, current_file_name, format_choice], |
|
outputs=[download_file, download_status] |
|
) |
|
|
|
gr.Markdown(""" |
|
--- |
|
### π‘ Pro Tips for Better Analysis: |
|
|
|
**π― For Best Results:** |
|
- Clean your data before upload (remove extra headers, format dates consistently) |
|
- Use descriptive column names |
|
- Ask specific questions like "What drives the highest profits?" instead of "Analyze this data" |
|
|
|
**π₯ Export Options:** |
|
- **HTML**: Interactive report with embedded charts and print-to-PDF option |
|
- **Markdown**: Simple text format for documentation |
|
|
|
**β‘ Speed Optimization:** |
|
- Files under 10MB process fastest |
|
- CSV files typically load faster than Excel |
|
- Limit to essential columns for quicker analysis |
|
|
|
**π§ Supported Formats:** CSV, XLSX, XLS | **π Max Size:** 50MB | **π Response Time:** ~3-5 seconds |
|
""") |
|
|
|
if __name__ == "__main__": |
|
app.queue(max_size=10) |
|
app.launch() |