shukdevdattaEX commited on
Commit
d513747
Β·
verified Β·
1 Parent(s): 5f8125b

Create v1.txt

Browse files
Files changed (1) hide show
  1. v1.txt +246 -0
v1.txt ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import aiohttp
4
+ import asyncio
5
+ import json
6
+ import io
7
+ import os
8
+ from typing import Optional, Tuple
9
+
10
+ class DataAnalyzer:
11
+ def __init__(self):
12
+ self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
13
+
14
+ async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
15
+ """Send data to Chutes API for analysis"""
16
+ headers = {
17
+ "Authorization": f"Bearer {api_token}",
18
+ "Content-Type": "application/json"
19
+ }
20
+
21
+ # Create the prompt based on whether it's initial analysis or follow-up question
22
+ if user_question:
23
+ prompt = f"""Based on this dataset summary:
24
+ {data_summary}
25
+ User question: {user_question}
26
+ Please provide a detailed answer based on the data."""
27
+ else:
28
+ prompt = f"""Analyze the following dataset and provide comprehensive insights:
29
+ {data_summary}
30
+ Please provide:
31
+ 1. Key statistical insights
32
+ 2. Notable patterns or trends
33
+ 3. Data quality observations
34
+ 4. Business recommendations
35
+ 5. Potential areas for further analysis
36
+ Keep the analysis clear, actionable, and data-driven."""
37
+
38
+ body = {
39
+ "model": "openai/gpt-oss-20b",
40
+ "messages": [
41
+ {
42
+ "role": "user",
43
+ "content": prompt
44
+ }
45
+ ],
46
+ "stream": True,
47
+ "max_tokens": 2048,
48
+ "temperature": 0.3 # Lower temperature for more consistent analysis
49
+ }
50
+
51
+ try:
52
+ async with aiohttp.ClientSession() as session:
53
+ async with session.post(self.api_base_url, headers=headers, json=body) as response:
54
+ if response.status != 200:
55
+ return f"Error: API request failed with status {response.status}"
56
+
57
+ full_response = ""
58
+ async for line in response.content:
59
+ line = line.decode("utf-8").strip()
60
+ if line.startswith("data: "):
61
+ data = line[6:]
62
+ if data == "[DONE]":
63
+ break
64
+ try:
65
+ chunk_data = json.loads(data)
66
+ if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
67
+ delta = chunk_data["choices"][0].get("delta", {})
68
+ content = delta.get("content", "")
69
+ if content:
70
+ full_response += content
71
+ except json.JSONDecodeError:
72
+ continue
73
+
74
+ return full_response if full_response else "No response received from the model."
75
+
76
+ except Exception as e:
77
+ return f"Error connecting to Chutes API: {str(e)}"
78
+
79
+ def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]:
80
+ """Process uploaded CSV or Excel file"""
81
+ try:
82
+ file_extension = os.path.splitext(file_path)[1].lower()
83
+
84
+ if file_extension == '.csv':
85
+ df = pd.read_csv(file_path)
86
+ elif file_extension in ['.xlsx', '.xls']:
87
+ df = pd.read_excel(file_path)
88
+ else:
89
+ raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
90
+
91
+ # Generate comprehensive data summary
92
+ summary = self.generate_data_summary(df)
93
+ return df, summary
94
+
95
+ except Exception as e:
96
+ raise Exception(f"Error processing file: {str(e)}")
97
+
98
+ def generate_data_summary(self, df: pd.DataFrame) -> str:
99
+ """Generate a comprehensive summary of the dataset"""
100
+ summary = []
101
+
102
+ # Basic info
103
+ summary.append(f"Dataset Overview:")
104
+ summary.append(f"- Shape: {df.shape[0]} rows Γ— {df.shape[1]} columns")
105
+ summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}")
106
+
107
+ # Column information
108
+ summary.append(f"\nColumn Information:")
109
+ for i, (col, dtype) in enumerate(df.dtypes.items()):
110
+ null_count = df[col].isnull().sum()
111
+ null_pct = (null_count / len(df)) * 100
112
+ summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)")
113
+
114
+ # Numerical columns statistics
115
+ numeric_cols = df.select_dtypes(include=['number']).columns
116
+ if len(numeric_cols) > 0:
117
+ summary.append(f"\nNumerical Columns Summary:")
118
+ for col in numeric_cols:
119
+ stats = df[col].describe()
120
+ summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]")
121
+
122
+ # Categorical columns
123
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
124
+ if len(categorical_cols) > 0:
125
+ summary.append(f"\nCategorical Columns Summary:")
126
+ for col in categorical_cols:
127
+ unique_count = df[col].nunique()
128
+ most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
129
+ summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'")
130
+
131
+ # Sample data
132
+ summary.append(f"\nFirst 5 rows preview:")
133
+ summary.append(df.head().to_string())
134
+
135
+ return "\n".join(summary)
136
+
137
+ # Initialize the analyzer
138
+ analyzer = DataAnalyzer()
139
+
140
+ async def analyze_data(file, api_key, user_question=""):
141
+ """Main function to analyze uploaded data"""
142
+ if not file:
143
+ return "Please upload a CSV or Excel file.", "", ""
144
+
145
+ if not api_key:
146
+ return "Please enter your Chutes API key.", "", ""
147
+
148
+ try:
149
+ # Process the uploaded file
150
+ df, data_summary = analyzer.process_file(file.name)
151
+
152
+ # Get AI analysis
153
+ ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
154
+
155
+ # Format the complete response
156
+ response = f"""## πŸ“Š Data Analysis Complete!
157
+ ### πŸ“ˆ Dataset Overview:
158
+ {data_summary}
159
+ ### πŸ€– AI Insights & Recommendations:
160
+ {ai_analysis}
161
+ """
162
+
163
+ return response, data_summary, df.head(10).to_html()
164
+
165
+ except Exception as e:
166
+ return f"Error: {str(e)}", "", ""
167
+
168
+ def sync_analyze_data(file, api_key, user_question=""):
169
+ """Synchronous wrapper for the async analyze function"""
170
+ return asyncio.run(analyze_data(file, api_key, user_question))
171
+
172
+ # Create the Gradio interface
173
+ with gr.Blocks(title="πŸ“Š Smart Data Analyzer", theme=gr.themes.Ocean()) as app:
174
+ gr.Markdown("""
175
+ # πŸ“Š Smart Data Analyzer
176
+ ### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes!
177
+ """)
178
+
179
+ with gr.Row():
180
+ with gr.Column(scale=1):
181
+ # File upload
182
+ file_input = gr.File(
183
+ label="πŸ“ Upload CSV or Excel File",
184
+ file_types=[".csv", ".xlsx", ".xls"],
185
+ file_count="single"
186
+ )
187
+
188
+ # API key input
189
+ api_key_input = gr.Textbox(
190
+ label="πŸ”‘ Chutes API Key",
191
+ placeholder="Enter your Chutes API token here...",
192
+ type="password",
193
+ lines=1
194
+ )
195
+
196
+ # Optional question input
197
+ question_input = gr.Textbox(
198
+ label="❓ Ask a Specific Question (Optional)",
199
+ placeholder="e.g., What are the sales trends? Which region performs best?",
200
+ lines=2
201
+ )
202
+
203
+ # Analyze button
204
+ analyze_btn = gr.Button("πŸš€ Analyze Data", variant="primary", size="lg")
205
+
206
+ with gr.Column(scale=2):
207
+ # Results display
208
+ analysis_output = gr.Markdown(
209
+ label="πŸ“‹ Analysis Results",
210
+ value="Upload a file and click 'Analyze Data' to see insights..."
211
+ )
212
+
213
+ # Additional outputs (hidden by default)
214
+ with gr.Accordion("πŸ“Š Data Preview", open=False):
215
+ data_preview = gr.HTML(label="First 10 Rows")
216
+
217
+ with gr.Accordion("πŸ” Raw Data Summary", open=False):
218
+ raw_summary = gr.Textbox(label="Dataset Summary", lines=10)
219
+
220
+ # Event handlers
221
+ analyze_btn.click(
222
+ fn=sync_analyze_data,
223
+ inputs=[file_input, api_key_input, question_input],
224
+ outputs=[analysis_output, raw_summary, data_preview]
225
+ )
226
+
227
+ # Example section
228
+ gr.Markdown("""
229
+ ### πŸ’‘ Tips for Best Results:
230
+ - **File Size**: Keep files under 10MB for fastest processing
231
+ - **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai)
232
+ - **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations
233
+ - **Formats**: Supports CSV, XLSX, and XLS files
234
+
235
+ ### 🎯 Example Questions to Ask:
236
+ - "What are the key trends in this sales data?"
237
+ - "Which products are underperforming?"
238
+ - "Are there any seasonal patterns?"
239
+ - "What recommendations do you have based on this data?"
240
+ """)
241
+
242
+ # Launch the application
243
+ if __name__ == "__main__":
244
+ app.launch(
245
+ share=True
246
+ )