srivatsavdamaraju commited on
Commit
a1a7312
Β·
verified Β·
1 Parent(s): 1cfe2c6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +336 -0
app.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import tempfile
5
+ import matplotlib.pyplot as plt
6
+ from pandasai import SmartDataframe
7
+ from langchain_groq.chat_models import ChatGroq
8
+ from dotenv import load_dotenv
9
+ import io
10
+ import base64
11
+ import re
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Hardcoded API key - Replace with your actual Groq API key
17
+ GROQ_API_KEY = "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"
18
+
19
+ # Global variables to store data
20
+ current_dataframe = None
21
+ current_smart_df = None
22
+ last_query_result = None
23
+
24
+ def analyze_chart_feasibility(query, df_data):
25
+ """
26
+ Analyze if the query can generate a meaningful chart
27
+ """
28
+ query_lower = query.lower()
29
+
30
+ # Chart-related keywords
31
+ chart_keywords = [
32
+ 'plot', 'chart', 'graph', 'visualize', 'visualization', 'bar', 'line',
33
+ 'pie', 'scatter', 'histogram', 'heatmap', 'boxplot', 'distribution'
34
+ ]
35
+
36
+ # Statistical keywords that might benefit from visualization
37
+ stat_keywords = [
38
+ 'top', 'bottom', 'highest', 'lowest', 'compare', 'comparison',
39
+ 'trend', 'relationship', 'correlation', 'by category', 'group by'
40
+ ]
41
+
42
+ # Check if query explicitly asks for a chart
43
+ explicit_chart = any(keyword in query_lower for keyword in chart_keywords)
44
+
45
+ # Check if query has statistical nature that could be visualized
46
+ statistical_nature = any(keyword in query_lower for keyword in stat_keywords)
47
+
48
+ # Check data characteristics
49
+ numeric_columns = df_data.select_dtypes(include=['number']).columns.tolist()
50
+ categorical_columns = df_data.select_dtypes(include=['object', 'category']).columns.tolist()
51
+
52
+ can_create_chart = False
53
+ chart_recommendation = ""
54
+ reasoning = ""
55
+
56
+ if explicit_chart:
57
+ can_create_chart = True
58
+ reasoning = "Query explicitly requests a chart/visualization."
59
+ chart_recommendation = "Chart will be generated as requested."
60
+ elif statistical_nature and len(numeric_columns) > 0:
61
+ can_create_chart = True
62
+ reasoning = f"Query involves statistical analysis with {len(numeric_columns)} numeric columns available for visualization."
63
+
64
+ # Suggest appropriate chart types
65
+ if 'top' in query_lower or 'bottom' in query_lower:
66
+ chart_recommendation = "Recommended: Bar chart to show rankings/comparisons."
67
+ elif 'relationship' in query_lower or 'correlation' in query_lower:
68
+ chart_recommendation = "Recommended: Scatter plot to show relationships."
69
+ elif 'distribution' in query_lower:
70
+ chart_recommendation = "Recommended: Histogram or box plot for distribution analysis."
71
+ else:
72
+ chart_recommendation = "Recommended: Bar chart or line chart based on data nature."
73
+ else:
74
+ reasoning = "Query appears to be asking for specific values, calculations, or text-based information that doesn't require visualization."
75
+ chart_recommendation = "Chart generation not recommended for this type of query."
76
+
77
+ return can_create_chart, reasoning, chart_recommendation
78
+
79
+ def process_query_only(file, query):
80
+ """
81
+ Process the query without generating charts
82
+ """
83
+ global current_dataframe, current_smart_df, last_query_result
84
+
85
+ try:
86
+ # Validate inputs
87
+ if file is None:
88
+ return "Please upload a CSV file.", "", ""
89
+
90
+ if not query.strip():
91
+ return "Please enter a query.", "", ""
92
+
93
+ # Read the CSV file if not already loaded or if file changed
94
+ if current_dataframe is None:
95
+ current_dataframe = pd.read_csv(file.name)
96
+
97
+ # Initialize Groq LLM
98
+ llm = ChatGroq(
99
+ model_name="mistral-saba-24b",
100
+ api_key=GROQ_API_KEY,
101
+ temperature=0
102
+ )
103
+
104
+ # Create SmartDataframe
105
+ current_smart_df = SmartDataframe(current_dataframe, config={
106
+ "llm": llm,
107
+ "save_charts": False, # Disabled for query-only mode
108
+ "enable_cache": False
109
+ })
110
+
111
+ # Analyze chart feasibility
112
+ can_chart, reasoning, recommendation = analyze_chart_feasibility(query, current_dataframe)
113
+
114
+ # Process the query
115
+ result = current_smart_df.chat(query)
116
+ last_query_result = result
117
+
118
+ # Handle different types of results
119
+ if result is None:
120
+ return "No result returned. Please try a different query.", reasoning, recommendation
121
+
122
+ # Format the text result
123
+ if isinstance(result, pd.DataFrame):
124
+ result_text = f"Query Result:\n\n{result.to_string()}"
125
+ elif isinstance(result, (int, float)):
126
+ result_text = f"Query Result: {result}"
127
+ elif isinstance(result, str):
128
+ result_text = f"Query Result:\n{result}"
129
+ else:
130
+ result_text = f"Query Result:\n{str(result)}"
131
+
132
+ return result_text, reasoning, recommendation
133
+
134
+ except Exception as e:
135
+ error_msg = f"Error processing query: {str(e)}"
136
+ return error_msg, "", ""
137
+
138
+ def generate_chart(query):
139
+ """
140
+ Generate chart based on the query and last result
141
+ """
142
+ global current_dataframe, current_smart_df, last_query_result
143
+
144
+ try:
145
+ if current_smart_df is None:
146
+ return "Please run a query first before generating charts.", None
147
+
148
+ if not query.strip():
149
+ return "Please enter a query for chart generation.", None
150
+
151
+ # Clean up old chart files
152
+ chart_files = [f for f in os.listdir(tempfile.gettempdir()) if f.endswith(('.png', '.jpg', '.jpeg'))]
153
+ for file in chart_files:
154
+ try:
155
+ os.remove(os.path.join(tempfile.gettempdir(), file))
156
+ except:
157
+ pass
158
+
159
+ # Create a chart-focused version of the query
160
+ chart_query = query
161
+ if not any(keyword in query.lower() for keyword in ['plot', 'chart', 'graph', 'visualize']):
162
+ # Add visualization instruction to the query
163
+ chart_query = f"Create a chart or visualization for: {query}"
164
+
165
+ # Reconfigure SmartDataframe for chart generation
166
+ llm = ChatGroq(
167
+ model_name="mistral-saba-24b",
168
+ api_key=GROQ_API_KEY,
169
+ temperature=0
170
+ )
171
+
172
+ chart_smart_df = SmartDataframe(current_dataframe, config={
173
+ "llm": llm,
174
+ "save_charts": True,
175
+ "save_charts_path": tempfile.gettempdir(),
176
+ "open_charts": False,
177
+ "enable_cache": False
178
+ })
179
+
180
+ # Generate chart
181
+ result = chart_smart_df.chat(chart_query)
182
+
183
+ # Look for generated chart
184
+ chart_path = None
185
+ chart_files = [f for f in os.listdir(tempfile.gettempdir()) if f.endswith(('.png', '.jpg', '.jpeg'))]
186
+
187
+ if chart_files:
188
+ # Get the most recent chart file
189
+ chart_files.sort(key=lambda x: os.path.getmtime(os.path.join(tempfile.gettempdir(), x)), reverse=True)
190
+ chart_path = os.path.join(tempfile.gettempdir(), chart_files[0])
191
+ return "Chart generated successfully!", chart_path
192
+ else:
193
+ return "Chart could not be generated. The query might not be suitable for visualization or there might be an issue with the data.", None
194
+
195
+ except Exception as e:
196
+ error_msg = f"Error generating chart: {str(e)}"
197
+ return error_msg, None
198
+
199
+ def reset_data():
200
+ """
201
+ Reset the loaded data to allow new file upload
202
+ """
203
+ global current_dataframe, current_smart_df, last_query_result
204
+ current_dataframe = None
205
+ current_smart_df = None
206
+ last_query_result = None
207
+ return "Data reset. Please upload a new file.", "", "", None, None
208
+
209
+ def create_interface():
210
+ """
211
+ Create the Gradio interface
212
+ """
213
+ with gr.Blocks(title="Enhanced PandasAI with Groq", theme=gr.themes.Soft()) as demo:
214
+ gr.Markdown(
215
+ """
216
+ # πŸ“Š Enhanced PandasAI Data Analysis with Groq
217
+
218
+ Upload a CSV file and analyze your data with separate query and chart generation capabilities.
219
+
220
+ **Instructions:**
221
+ 1. Upload your CSV file
222
+ 2. Enter your query and click "Analyze Query" to get text results and chart feasibility analysis
223
+ 3. If chart is recommended, click "Generate Chart" to create visualizations
224
+ 4. Use "Reset Data" to load a new file
225
+ """
226
+ )
227
+
228
+ with gr.Row():
229
+ with gr.Column(scale=1):
230
+ # Input components
231
+ file_input = gr.File(
232
+ label="Upload CSV File",
233
+ file_types=[".csv"]
234
+ )
235
+
236
+ query_input = gr.Textbox(
237
+ label="Your Query",
238
+ placeholder="e.g., 'Which are the top 5 countries by population?' or 'Show relationship between two columns'",
239
+ lines=3
240
+ )
241
+
242
+ with gr.Row():
243
+ analyze_btn = gr.Button("πŸ” Analyze Query", variant="primary")
244
+ chart_btn = gr.Button("πŸ“Š Generate Chart", variant="secondary")
245
+ reset_btn = gr.Button("πŸ”„ Reset Data", variant="stop")
246
+
247
+ with gr.Column(scale=2):
248
+ # Output components
249
+ result_output = gr.Textbox(
250
+ label="Analysis Result",
251
+ lines=8,
252
+ interactive=False
253
+ )
254
+
255
+ with gr.Row():
256
+ with gr.Column():
257
+ feasibility_output = gr.Textbox(
258
+ label="Chart Feasibility Analysis",
259
+ lines=3,
260
+ interactive=False
261
+ )
262
+ with gr.Column():
263
+ recommendation_output = gr.Textbox(
264
+ label="Chart Recommendation",
265
+ lines=3,
266
+ interactive=False
267
+ )
268
+
269
+ chart_status = gr.Textbox(
270
+ label="Chart Generation Status",
271
+ lines=2,
272
+ interactive=False
273
+ )
274
+
275
+ chart_output = gr.Image(
276
+ label="Generated Visualization"
277
+ )
278
+
279
+ # Example section
280
+ gr.Markdown(
281
+ """
282
+ ### πŸ’‘ Example Workflow:
283
+
284
+ **Step 1 - Data Analysis Queries:**
285
+ - "What are the top 10 countries by population?"
286
+ - "Calculate the average population of all countries"
287
+ - "Which country has the highest GDP?"
288
+
289
+ **Step 2 - Chart Generation:**
290
+ - After running a query, click "Generate Chart" to visualize the results
291
+ - The system will analyze if your query can be effectively visualized
292
+ - Charts work best with comparative, ranking, or relationship-based queries
293
+
294
+ **Query Types that work well for charts:**
295
+ - Ranking queries (top/bottom N items)
296
+ - Comparisons between categories
297
+ - Relationships between variables
298
+ - Distribution analysis
299
+ """
300
+ )
301
+
302
+ # Event handlers
303
+ analyze_btn.click(
304
+ fn=process_query_only,
305
+ inputs=[file_input, query_input],
306
+ outputs=[result_output, feasibility_output, recommendation_output]
307
+ )
308
+
309
+ chart_btn.click(
310
+ fn=generate_chart,
311
+ inputs=[query_input],
312
+ outputs=[chart_status, chart_output]
313
+ )
314
+
315
+ reset_btn.click(
316
+ fn=reset_data,
317
+ outputs=[chart_status, feasibility_output, recommendation_output, chart_output, result_output]
318
+ )
319
+
320
+ # Allow Enter key to analyze query
321
+ query_input.submit(
322
+ fn=process_query_only,
323
+ inputs=[file_input, query_input],
324
+ outputs=[result_output, feasibility_output, recommendation_output]
325
+ )
326
+
327
+ return demo
328
+
329
+ if __name__ == "__main__":
330
+ # Create and launch the interface
331
+ demo = create_interface()
332
+ demo.launch(
333
+ server_name="0.0.0.0",
334
+ server_port=7860,
335
+ share=False
336
+ )