Create v1.txt
Browse files
v1.txt
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import aiohttp
|
4 |
+
import asyncio
|
5 |
+
import json
|
6 |
+
import io
|
7 |
+
import os
|
8 |
+
from typing import Optional, Tuple
|
9 |
+
|
10 |
+
class DataAnalyzer:
|
11 |
+
def __init__(self):
|
12 |
+
self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
|
13 |
+
|
14 |
+
async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
|
15 |
+
"""Send data to Chutes API for analysis"""
|
16 |
+
headers = {
|
17 |
+
"Authorization": f"Bearer {api_token}",
|
18 |
+
"Content-Type": "application/json"
|
19 |
+
}
|
20 |
+
|
21 |
+
# Create the prompt based on whether it's initial analysis or follow-up question
|
22 |
+
if user_question:
|
23 |
+
prompt = f"""Based on this dataset summary:
|
24 |
+
{data_summary}
|
25 |
+
User question: {user_question}
|
26 |
+
Please provide a detailed answer based on the data."""
|
27 |
+
else:
|
28 |
+
prompt = f"""Analyze the following dataset and provide comprehensive insights:
|
29 |
+
{data_summary}
|
30 |
+
Please provide:
|
31 |
+
1. Key statistical insights
|
32 |
+
2. Notable patterns or trends
|
33 |
+
3. Data quality observations
|
34 |
+
4. Business recommendations
|
35 |
+
5. Potential areas for further analysis
|
36 |
+
Keep the analysis clear, actionable, and data-driven."""
|
37 |
+
|
38 |
+
body = {
|
39 |
+
"model": "openai/gpt-oss-20b",
|
40 |
+
"messages": [
|
41 |
+
{
|
42 |
+
"role": "user",
|
43 |
+
"content": prompt
|
44 |
+
}
|
45 |
+
],
|
46 |
+
"stream": True,
|
47 |
+
"max_tokens": 2048,
|
48 |
+
"temperature": 0.3 # Lower temperature for more consistent analysis
|
49 |
+
}
|
50 |
+
|
51 |
+
try:
|
52 |
+
async with aiohttp.ClientSession() as session:
|
53 |
+
async with session.post(self.api_base_url, headers=headers, json=body) as response:
|
54 |
+
if response.status != 200:
|
55 |
+
return f"Error: API request failed with status {response.status}"
|
56 |
+
|
57 |
+
full_response = ""
|
58 |
+
async for line in response.content:
|
59 |
+
line = line.decode("utf-8").strip()
|
60 |
+
if line.startswith("data: "):
|
61 |
+
data = line[6:]
|
62 |
+
if data == "[DONE]":
|
63 |
+
break
|
64 |
+
try:
|
65 |
+
chunk_data = json.loads(data)
|
66 |
+
if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
|
67 |
+
delta = chunk_data["choices"][0].get("delta", {})
|
68 |
+
content = delta.get("content", "")
|
69 |
+
if content:
|
70 |
+
full_response += content
|
71 |
+
except json.JSONDecodeError:
|
72 |
+
continue
|
73 |
+
|
74 |
+
return full_response if full_response else "No response received from the model."
|
75 |
+
|
76 |
+
except Exception as e:
|
77 |
+
return f"Error connecting to Chutes API: {str(e)}"
|
78 |
+
|
79 |
+
def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]:
|
80 |
+
"""Process uploaded CSV or Excel file"""
|
81 |
+
try:
|
82 |
+
file_extension = os.path.splitext(file_path)[1].lower()
|
83 |
+
|
84 |
+
if file_extension == '.csv':
|
85 |
+
df = pd.read_csv(file_path)
|
86 |
+
elif file_extension in ['.xlsx', '.xls']:
|
87 |
+
df = pd.read_excel(file_path)
|
88 |
+
else:
|
89 |
+
raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
|
90 |
+
|
91 |
+
# Generate comprehensive data summary
|
92 |
+
summary = self.generate_data_summary(df)
|
93 |
+
return df, summary
|
94 |
+
|
95 |
+
except Exception as e:
|
96 |
+
raise Exception(f"Error processing file: {str(e)}")
|
97 |
+
|
98 |
+
def generate_data_summary(self, df: pd.DataFrame) -> str:
|
99 |
+
"""Generate a comprehensive summary of the dataset"""
|
100 |
+
summary = []
|
101 |
+
|
102 |
+
# Basic info
|
103 |
+
summary.append(f"Dataset Overview:")
|
104 |
+
summary.append(f"- Shape: {df.shape[0]} rows Γ {df.shape[1]} columns")
|
105 |
+
summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}")
|
106 |
+
|
107 |
+
# Column information
|
108 |
+
summary.append(f"\nColumn Information:")
|
109 |
+
for i, (col, dtype) in enumerate(df.dtypes.items()):
|
110 |
+
null_count = df[col].isnull().sum()
|
111 |
+
null_pct = (null_count / len(df)) * 100
|
112 |
+
summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)")
|
113 |
+
|
114 |
+
# Numerical columns statistics
|
115 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
116 |
+
if len(numeric_cols) > 0:
|
117 |
+
summary.append(f"\nNumerical Columns Summary:")
|
118 |
+
for col in numeric_cols:
|
119 |
+
stats = df[col].describe()
|
120 |
+
summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]")
|
121 |
+
|
122 |
+
# Categorical columns
|
123 |
+
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
|
124 |
+
if len(categorical_cols) > 0:
|
125 |
+
summary.append(f"\nCategorical Columns Summary:")
|
126 |
+
for col in categorical_cols:
|
127 |
+
unique_count = df[col].nunique()
|
128 |
+
most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
|
129 |
+
summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'")
|
130 |
+
|
131 |
+
# Sample data
|
132 |
+
summary.append(f"\nFirst 5 rows preview:")
|
133 |
+
summary.append(df.head().to_string())
|
134 |
+
|
135 |
+
return "\n".join(summary)
|
136 |
+
|
137 |
+
# Initialize the analyzer
|
138 |
+
analyzer = DataAnalyzer()
|
139 |
+
|
140 |
+
async def analyze_data(file, api_key, user_question=""):
|
141 |
+
"""Main function to analyze uploaded data"""
|
142 |
+
if not file:
|
143 |
+
return "Please upload a CSV or Excel file.", "", ""
|
144 |
+
|
145 |
+
if not api_key:
|
146 |
+
return "Please enter your Chutes API key.", "", ""
|
147 |
+
|
148 |
+
try:
|
149 |
+
# Process the uploaded file
|
150 |
+
df, data_summary = analyzer.process_file(file.name)
|
151 |
+
|
152 |
+
# Get AI analysis
|
153 |
+
ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
|
154 |
+
|
155 |
+
# Format the complete response
|
156 |
+
response = f"""## π Data Analysis Complete!
|
157 |
+
### π Dataset Overview:
|
158 |
+
{data_summary}
|
159 |
+
### π€ AI Insights & Recommendations:
|
160 |
+
{ai_analysis}
|
161 |
+
"""
|
162 |
+
|
163 |
+
return response, data_summary, df.head(10).to_html()
|
164 |
+
|
165 |
+
except Exception as e:
|
166 |
+
return f"Error: {str(e)}", "", ""
|
167 |
+
|
168 |
+
def sync_analyze_data(file, api_key, user_question=""):
|
169 |
+
"""Synchronous wrapper for the async analyze function"""
|
170 |
+
return asyncio.run(analyze_data(file, api_key, user_question))
|
171 |
+
|
172 |
+
# Create the Gradio interface
|
173 |
+
with gr.Blocks(title="π Smart Data Analyzer", theme=gr.themes.Ocean()) as app:
|
174 |
+
gr.Markdown("""
|
175 |
+
# π Smart Data Analyzer
|
176 |
+
### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes!
|
177 |
+
""")
|
178 |
+
|
179 |
+
with gr.Row():
|
180 |
+
with gr.Column(scale=1):
|
181 |
+
# File upload
|
182 |
+
file_input = gr.File(
|
183 |
+
label="π Upload CSV or Excel File",
|
184 |
+
file_types=[".csv", ".xlsx", ".xls"],
|
185 |
+
file_count="single"
|
186 |
+
)
|
187 |
+
|
188 |
+
# API key input
|
189 |
+
api_key_input = gr.Textbox(
|
190 |
+
label="π Chutes API Key",
|
191 |
+
placeholder="Enter your Chutes API token here...",
|
192 |
+
type="password",
|
193 |
+
lines=1
|
194 |
+
)
|
195 |
+
|
196 |
+
# Optional question input
|
197 |
+
question_input = gr.Textbox(
|
198 |
+
label="β Ask a Specific Question (Optional)",
|
199 |
+
placeholder="e.g., What are the sales trends? Which region performs best?",
|
200 |
+
lines=2
|
201 |
+
)
|
202 |
+
|
203 |
+
# Analyze button
|
204 |
+
analyze_btn = gr.Button("π Analyze Data", variant="primary", size="lg")
|
205 |
+
|
206 |
+
with gr.Column(scale=2):
|
207 |
+
# Results display
|
208 |
+
analysis_output = gr.Markdown(
|
209 |
+
label="π Analysis Results",
|
210 |
+
value="Upload a file and click 'Analyze Data' to see insights..."
|
211 |
+
)
|
212 |
+
|
213 |
+
# Additional outputs (hidden by default)
|
214 |
+
with gr.Accordion("π Data Preview", open=False):
|
215 |
+
data_preview = gr.HTML(label="First 10 Rows")
|
216 |
+
|
217 |
+
with gr.Accordion("π Raw Data Summary", open=False):
|
218 |
+
raw_summary = gr.Textbox(label="Dataset Summary", lines=10)
|
219 |
+
|
220 |
+
# Event handlers
|
221 |
+
analyze_btn.click(
|
222 |
+
fn=sync_analyze_data,
|
223 |
+
inputs=[file_input, api_key_input, question_input],
|
224 |
+
outputs=[analysis_output, raw_summary, data_preview]
|
225 |
+
)
|
226 |
+
|
227 |
+
# Example section
|
228 |
+
gr.Markdown("""
|
229 |
+
### π‘ Tips for Best Results:
|
230 |
+
- **File Size**: Keep files under 10MB for fastest processing
|
231 |
+
- **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai)
|
232 |
+
- **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations
|
233 |
+
- **Formats**: Supports CSV, XLSX, and XLS files
|
234 |
+
|
235 |
+
### π― Example Questions to Ask:
|
236 |
+
- "What are the key trends in this sales data?"
|
237 |
+
- "Which products are underperforming?"
|
238 |
+
- "Are there any seasonal patterns?"
|
239 |
+
- "What recommendations do you have based on this data?"
|
240 |
+
""")
|
241 |
+
|
242 |
+
# Launch the application
|
243 |
+
if __name__ == "__main__":
|
244 |
+
app.launch(
|
245 |
+
share=True
|
246 |
+
)
|