Entz commited on
Commit
d3a543b
Β·
verified Β·
1 Parent(s): d633dfa

Upload 6 files

Browse files
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY=you_api
backend.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import sqlite3
4
+ import pandas as pd
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import google.generativeai as genai
8
+
9
+ app = FastAPI()
10
+
11
+ # Load environment variables and configure Genai
12
+ load_dotenv()
13
+ genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
14
+
15
+ # Define the schema for the incoming request
16
+ class Query(BaseModel):
17
+ question: str
18
+ data_source: str
19
+
20
+ def get_gemini_response(question, prompt):
21
+ model = genai.GenerativeModel('gemini-1.5-pro') # https://ai.google.dev/pricing?authuser=1#1_5pro
22
+ response = model.generate_content([prompt, question])
23
+ return response.text
24
+
25
+ # Update column and table names for the new dataset
26
+ sql_cols_human = 'REQUESTID', 'DATETIMEINIT', 'SOURCE', 'DESCRIPTION', 'REQCATEGORY', 'STATUS', 'REFERREDTO', 'DATETIMECLOSED', 'City', 'State', 'Ward', 'Postcode'
27
+ csv_columns_human = ['REQUESTID', 'DATETIMEINIT', 'SOURCE', 'DESCRIPTION', 'REQCATEGORY', 'STATUS', 'REFERREDTO', 'DATETIMECLOSED', 'City', 'State', 'Ward', 'Postcode']
28
+ sql_cols = 'REQUESTID', 'DATETIMEINIT', 'SOURCE', 'DESCRIPTION', 'REQCATEGORY', 'STATUS', 'REFERREDTO', 'DATETIMECLOSED', 'City', 'State', 'Ward', 'Postcode'
29
+ # csv_columns = ["REQUESTID", "DATETIMEINIT", "SOURCE", "DESCRIPTION", "REQCATEGORY", "STATUS", "REFERREDTO", "DATETIMECLOSED", "PROBADDRESS" "City", "State", "Ward", "Postcode"]
30
+
31
+ def get_csv_columns():
32
+ df = pd.read_csv('wandsworth_callcenter_sampled.csv')
33
+ return df.columns.tolist()
34
+
35
+ csv_columns = get_csv_columns()
36
+ print(csv_columns)
37
+
38
+ sql_prompt = f"""
39
+ You are an expert in converting English questions to SQLite code!
40
+ The SQLite database has the name CALLCENTER_REQUESTS and has the following Columns: {', '.join(sql_cols)}
41
+
42
+ Here are some key details about the dataset:
43
+ - `SOURCE`: Phone, Online Form, FixMyStreet, Email, Telephone/Email, Telephone Voicemail, Other, Local Council Office.
44
+ - `REQCATEGORY`: Blocked Drains, Council Building Maintenance, Fly-Tipping, Street and Pavement Maintenance, Recycling, Traffic Signage Issues, Parks Maintenance, Graffiti Removal, Tree Maintenance.
45
+ - `STATUS`: Resolved, In Progress, Cancelled by Customer, Referred to External Agency, Work Order Created, Under Review.
46
+ - `REFERREDTO`: Council Enforcement, Transport for London (TfL), Thames Water, Royal Mail, UK Power Networks.
47
+
48
+ For example:
49
+ - Would you please list all unresolved calls? command: SELECT * FROM CALLCENTER_REQUESTS WHERE STATUS='In Progress';
50
+ - Would you please count the total number of calls? command: SELECT COUNT(*) FROM CALLCENTER_REQUESTS;
51
+ - List all unique wards please? command: SELECT DISTINCT Ward FROM CALLCENTER_REQUESTS;
52
+
53
+ Also, the SQL code should not have ''' in the beginning or at the end, and SQL word in output.
54
+ Ensure that you only generate valid SQLite database queries, not pandas or Python code.
55
+ """
56
+
57
+
58
+
59
+ csv_prompt = f"""
60
+ You are an expert in analyzing CSV data and converting English questions to pandas query syntax.
61
+ The CSV file is named 'wandsworth_callcenter_sampled.csv' and contains residents' call information in Wandsworth Council.
62
+ The available columns in the CSV file are: {', '.join(csv_columns)}
63
+
64
+ Here are some key details about the dataset:
65
+ - `SOURCE`: Phone, Online Form, FixMyStreet, Email, Telephone/Email, Telephone Voicemail, Other, Local Council Office.
66
+ - `REQCATEGORY`: Blocked Drains, Council Building Maintenance, Fly-Tipping, Street and Pavement Maintenance, Recycling, Traffic Signage Issues, Parks Maintenance, Graffiti Removal, Tree Maintenance.
67
+ - `STATUS`: Resolved, In Progress, Cancelled by Customer, Referred to External Agency, Work Order Created, Under Review.
68
+ - `REFERREDTO`: Council Enforcement, Transport for London (TfL), Thames Water, Royal Mail, UK Power Networks.
69
+
70
+ For example:
71
+ - How many calls in total? len(df.REQUESTID)
72
+ - What are all the calls referred to external agencies? df[df['REFERREDTO'].notna()]
73
+ - Would you please show the top 5 most frequent call categories? df['REQCATEGORY'].value_counts().head(5)
74
+
75
+ Please ensure:
76
+ 1. Always reference columns using `df['COLUMN_NAME']`.
77
+ 2. Do not use Python lists like `['COLUMN_NAME']` to refer to columns.
78
+ 3. Provide only the pandas query syntax without any additional explanation or markdown formatting.
79
+ Make sure to use only the columns that are available in the CSV file.
80
+ Ensure that you only generate valid pandas queries. NO SQL or other types of code/syntax.
81
+
82
+ """
83
+
84
+ def execute_sql_query(query):
85
+ conn = sqlite3.connect('wandsworth_callcenter_sampled.db')
86
+ try:
87
+ cursor = conn.cursor()
88
+ cursor.execute(query)
89
+ result = cursor.fetchall()
90
+ return result
91
+ except sqlite3.Error as e:
92
+ # Capture and explain SQL errors
93
+ sql_error_message = str(e)
94
+ # Send the error message back to Gemini for explanation
95
+ error_prompt = f"""
96
+ You are an expert SQL debugger and an assistant of the director. An error occurred while executing the following query:
97
+ {query}
98
+
99
+ The error was: {sql_error_message}
100
+ Please explain the error in simple laymen terms. Do Not explain.
101
+ Do Not include any programming code, e.g. sql or python syntax, etc.
102
+ And finally politely remind the user there are only information about the following columns{', '.join(sql_cols_human)}.
103
+ Explain this in layman's terms and remind the user that the dataset contains the following columns: {', '.join(sql_cols_human)}.
104
+ """
105
+ explanation = get_gemini_response("", error_prompt)
106
+ raise HTTPException(status_code=400, detail={"error": sql_error_message, "explanation": explanation})
107
+ finally:
108
+ conn.close()
109
+
110
+
111
+
112
+
113
+ def execute_pandas_query(query):
114
+ df = pd.read_csv('wandsworth_callcenter_sampled.csv')
115
+ df.columns = df.columns.str.upper() # Normalize column names to uppercase
116
+ print(f"df is loaded. The first line is: {df.head(1)}")
117
+
118
+ # Remove code block indicators (e.g., ```python and ```)
119
+ query = query.replace("```python", "").replace("```", "").strip()
120
+
121
+ # Split query into lines
122
+ query_lines = query.split("\n") # Split into individual statements
123
+ try:
124
+ result = None
125
+ exec_context = {'df': df, 'pd': pd} # Execution context for exec()
126
+ for line in query_lines:
127
+ line = line.strip() # Remove extra spaces
128
+ if line: # Skip empty lines
129
+ print(f"Executing line: {line}")
130
+ exec(line, exec_context) # Execute each line in the context
131
+
132
+ # Retrieve the final result if the last line is a statement
133
+ result = eval(query_lines[-1].strip(), exec_context) # Evaluate the last line for the result
134
+
135
+ print(f"Query Result Before Serialization: {result}")
136
+
137
+ # Handle DataFrame results
138
+ if isinstance(result, pd.DataFrame):
139
+ # Replace NaN and infinite values with JSON-compliant values
140
+ result = result.replace([float('inf'), -float('inf')], None).fillna(value="N/A")
141
+ return result.to_dict(orient='records')
142
+
143
+ # Handle Series results
144
+ elif isinstance(result, pd.Series):
145
+ result = result.replace([float('inf'), -float('inf')], None).fillna(value="N/A")
146
+ return result.to_dict()
147
+
148
+ # Handle scalar results
149
+ else:
150
+ return result
151
+
152
+ except Exception as e:
153
+ print(f"Error: {e}")
154
+ raise HTTPException(status_code=400, detail=f"Pandas Error: {str(e)}")
155
+
156
+
157
+
158
+
159
+
160
+ @app.post("/query")
161
+ async def process_query(query: Query):
162
+ if query.data_source == "SQL Database":
163
+ ai_response = get_gemini_response(query.question, sql_prompt)
164
+ try:
165
+ result = execute_sql_query(ai_response)
166
+ return {"query": ai_response, "result": result}
167
+ except HTTPException as e:
168
+ error_detail = e.detail
169
+ return {"query": ai_response, "error": error_detail["error"], "explanation": error_detail["explanation"]}
170
+ else: # CSV Data
171
+ ai_response = get_gemini_response(query.question, csv_prompt)
172
+ print(f"\n\nai_response: {ai_response}")
173
+ try:
174
+ result = execute_pandas_query(ai_response)
175
+ return {"query": ai_response, "result": result, "columns": csv_columns}
176
+ except HTTPException as e:
177
+ raise HTTPException(status_code=400, detail=f"Error in pandas query: {e.detail}")
frontend.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+
5
+ # Page Configuration
6
+ st.set_page_config(
7
+ page_title="CallDataAI - Wandsworth Council Call Center Analysis",
8
+ page_icon="πŸ“ž",
9
+ layout="wide",
10
+ initial_sidebar_state="expanded",
11
+ )
12
+
13
+ # Sidebar
14
+ st.sidebar.title("πŸ“ž CallDataAI")
15
+ st.sidebar.markdown(
16
+ """
17
+ **Welcome to CallDataAI**, your AI-powered assistant for analyzing Wandsworth Council's Call Center data. Use the menu below to:
18
+ - Select the data source (SQL/CSV)
19
+ - Run pre-defined or custom queries
20
+ - Gain actionable insights
21
+ """
22
+ )
23
+
24
+ # Data source selection
25
+ st.sidebar.markdown("### Select Data Source:")
26
+ data_source = st.sidebar.radio("", ('SQL Database', 'CSV Database'))
27
+
28
+ # Common queries section
29
+ st.sidebar.markdown("### Common Queries:")
30
+ common_queries = {
31
+ 'SQL Database': [
32
+ 'List all unique Source',
33
+ 'List all unique request categories',
34
+ 'List all unique wards and their postcodes',
35
+ 'Count the total number of calls',
36
+ 'List all unresolved calls',
37
+ 'What are the total number of requests per year?',
38
+ 'What are the average time (days) to close request per request category?',
39
+ ],
40
+ 'CSV Database': [
41
+ 'Count total number of call requests',
42
+ 'List all calls referred to external agencies',
43
+ 'Show top 5 most frequent call categories',
44
+ ]
45
+ }
46
+
47
+ for idx, query in enumerate(common_queries[data_source]):
48
+ if st.sidebar.button(query, key=f"query_button_{idx}"): # Add unique key
49
+ st.session_state["common_query"] = query
50
+
51
+
52
+
53
+
54
+ # Title and Description
55
+ st.title("πŸ“ž CallDataAI - Wandsworth Council Call Center Analysis")
56
+ st.markdown(
57
+ """
58
+ **CallDataAI** is an AI-powered chatbot designed for analyzing Wandsworth Council's Call Center data.
59
+ Input natural language queries to explore the data and gain actionable insights.
60
+ """
61
+ )
62
+
63
+ # Input Section
64
+ with st.container():
65
+ st.markdown("### Enter Your Question")
66
+ question = st.text_input(
67
+ "Input:", key="input", value=st.session_state.get("common_query", ""), placeholder="Type your query here..."
68
+ )
69
+ submit = st.button("Submit", type="primary")
70
+
71
+ # Main Content
72
+ if submit:
73
+ # Send request to FastAPI backend
74
+ with st.spinner("Processing your request..."):
75
+ response = requests.post(
76
+ "http://localhost:8000/query", json={"question": question, "data_source": data_source}
77
+ )
78
+
79
+ # Handle response
80
+ if response.status_code == 200:
81
+ data = response.json()
82
+
83
+ # Error Handling
84
+ if "error" in data:
85
+ with st.expander("Error Explanation"):
86
+ st.error(data["explanation"])
87
+
88
+ # Display Results
89
+ else:
90
+ col1, col2 = st.columns(2)
91
+
92
+ with col1:
93
+ st.markdown(f"### Generated {'SQL' if data_source == 'SQL Database' else 'Pandas'} Query")
94
+ st.code(data["query"], language="sql" if data_source == "SQL Database" else "python")
95
+
96
+ with col2:
97
+ st.markdown("### Query Results")
98
+ result = data["result"]
99
+
100
+ if isinstance(result, list) and len(result) > 0:
101
+ if isinstance(result[0], dict):
102
+ df = pd.DataFrame(result)
103
+ st.dataframe(df)
104
+ elif isinstance(result[0], list):
105
+ df = pd.DataFrame(result)
106
+ st.dataframe(df)
107
+ else:
108
+ st.write(result)
109
+
110
+ elif isinstance(result, dict):
111
+ st.json(result)
112
+
113
+ else:
114
+ st.write(result)
115
+
116
+ if data_source == "CSV Database":
117
+ st.markdown("### Available CSV Columns")
118
+ st.write(data["columns"])
119
+
120
+ # Update chat history in session state
121
+ if "chat_history" not in st.session_state:
122
+ st.session_state["chat_history"] = []
123
+
124
+ st.session_state["chat_history"].append(f"πŸ”§({data_source}): {question}")
125
+ st.session_state["chat_history"].append(f"πŸ€–: {data['query']}")
126
+
127
+ else:
128
+ st.error(f"Error processing your request: {response.text}")
129
+
130
+ # Chat History Section
131
+ with st.container():
132
+ st.markdown("### Chat History")
133
+ if "chat_history" in st.session_state:
134
+ for message in st.session_state["chat_history"]:
135
+ st.text(message)
136
+ if st.button("Clear Chat History"):
137
+ st.session_state["chat_history"] = []
138
+ st.success("Chat history cleared!")
139
+
140
+ st.markdown("---")
141
+ st.markdown("Developed by Lorentz Yeung, 2024 Christmas")
142
+ st.markdown("Contact: [email protected] or [email protected]")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.110.3
2
+ google-generativeai==0.8.3
3
+ pandas==2.2.3
4
+ pydantic==2.9.2
5
+ python-dotenv==1.0.1
6
+ uvicorn==0.30.6
7
+ streamlit==1.40.1
8
+ requests==2.32.3
9
+
wandsworth_callcenter_sampled.csv ADDED
The diff for this file is too large to render. See raw diff
 
wandsworth_callcenter_sampled.db ADDED
Binary file (197 kB). View file