File size: 9,967 Bytes
81149e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
823be3f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import streamlit as st
import os
import pandas as pd
import plotly.express as px
import ast
import google.generativeai as genai
from io import StringIO
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configure Genai Key
# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Function to load Google Gemini Model and provide queries as response
def get_gemini_response(question, prompt):
    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content([prompt[0], question])
    return response.text.strip()

# Function to load data from CSV
@st.cache_data
def load_data():
    # This is a sample CSV content. In practice, you'd read this from a file.
    csv_content = """
id,product_name,category,price,stock_quantity,supplier,last_restock_date
1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
"""
    df = pd.read_csv(StringIO(csv_content))
    df['price'] = pd.to_numeric(df['price'], errors='coerce')
    df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
    return df

# # Function to execute pandas query
# def execute_pandas_query(df, query):
#     try:
#         # This is a very simple and unsafe way to execute queries.
#         # In a real application, you'd need to parse the SQL and translate it to pandas operations.
#         result = eval(f"df.{query}")
#         return result
#     except Exception as e:
#         st.error(f"An error occurred: {e}")
#         return pd.DataFrame()

# # Define Your Prompt
# prompt = [
#     """
#     You are an expert in converting English questions to pandas DataFrame operations!
#     The DataFrame 'df' has the following columns: 
#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
    
#     Examples:
#     - How many products do we have in total? 
#       The pandas operation will be: len()
#     - What are all the products in the Electronics category? 
#       The pandas operation will be: query("category == 'Electronics'")
    
#     The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
#     """
# ]

# Function to execute pandas query
# def execute_pandas_query(df, query):
#     try:
#         # Remove any 'df.' prefixes from the query
#         query = query.replace('df.', '')
        
#         # Execute the query
#         if query.startswith('query'):
#             # For filtering operations
#             result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
#         elif query.startswith('groupby'):
#             # For groupby operations
#             group_col, agg_func = query.split('.', 2)[1:]
#             result = eval(f"df.groupby('{group_col}').{agg_func}")
#         else:
#             # For other operations
#             result = eval(f"df.{query}")
        
#         return result
#     except Exception as e:
#         st.error(f"An error occurred: {e}")
#         return pd.DataFrame()

# # Define Your Prompt
# prompt = [
#     """
#     You are an expert in converting English questions to pandas DataFrame operations!
#     The DataFrame 'df' has the following columns: 
#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
    
#     Examples:
#     - How many products do we have in total? 
#       The pandas operation will be: shape[0]
#     - What are all the products in the Electronics category? 
#       The pandas operation will be: query("category == 'Electronics'")
#     - What is the average price of products in each category?
#       The pandas operation will be: groupby('category').mean()['price']
    
#     The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
#     """
# ]

# Function to safely evaluate a string as a Python expression
def safe_eval(expr, df):
    try:
        # Parse the expression
        parsed = ast.parse(expr, mode='eval')
        
        # Define allowed names
        allowed_names = {
            'df': df,
            'query': df.query,
            'groupby': df.groupby,
            'mean': pd.DataFrame.mean,
            'sum': pd.DataFrame.sum,
            'count': pd.DataFrame.count,
            'max': pd.DataFrame.max,
            'min': pd.DataFrame.min
        }
        
        # Evaluate the expression
        return eval(compile(parsed, '<string>', 'eval'), allowed_names)
    except Exception as e:
        st.error(f"Error in query execution: {e}")
        return pd.DataFrame()

# Function to execute pandas query
def execute_pandas_query(df, query):
    try:
        # Remove any 'df.' prefixes from the query
        query = query.replace('df.', '')
        
        # Execute the query
        result = safe_eval(query, df)
        
        # Convert result to DataFrame if it's not already
        if not isinstance(result, pd.DataFrame):
            if isinstance(result, pd.Series):
                result = result.to_frame()
            else:
                result = pd.DataFrame({'Result': [result]})
        
        return result
    except Exception as e:
        st.error(f"An error occurred: {e}")
        return pd.DataFrame()

# Define Your Prompt
prompt = [
    """
    You are an expert in converting English questions to pandas DataFrame operations!
    The DataFrame 'df' has the following columns: 
    id, product_name, category, price, stock_quantity, supplier, last_restock_date.
    
    Examples:
    - How many products do we have in total? 
      The pandas operation will be: len(df)
    - What are all the products in the Electronics category? 
      The pandas operation will be: df.query("category == 'Electronics'")
    - What is the average price of products in each category?
      The pandas operation will be: df.groupby('category')['price'].mean()
    
    The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
    Always include 'df.' at the beginning of your operations unless you're using a function like len().
    """
]

# Streamlit App
st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")

# Load data
df = load_data()

# Sidebar for user input
st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface")
question = st.sidebar.text_area("Enter your question:", key="input")
submit = st.sidebar.button("Ask Me")

# Main content area
st.title("AutomatiX - Department Store Dashboard")

if submit:
    with st.spinner("Generating and Fetching the data..."):
        pandas_query = get_gemini_response(question, prompt)
        # st.code(pandas_query, language="python")
        
        result_df = execute_pandas_query(df, pandas_query)
        
        if not result_df.empty:
            st.success("Query executed successfully!")
            
            # Display data in a table
            st.subheader("Data Table")
            st.dataframe(result_df)
            
            # # Create visualizations based on the data
            st.subheader("Data Visualizations")
            
            col1, col2 = st.columns(2)
            
            with col1:
                if 'price' in result_df.columns and result_df['price'].notna().any():
                    fig = px.histogram(result_df, x='price', title='Price Distribution')
                    st.plotly_chart(fig, use_container_width=True)
                
                if 'category' in result_df.columns:
                    category_counts = result_df['category'].value_counts()
                    fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
                    st.plotly_chart(fig, use_container_width=True)
            
            with col2:
                if 'last_restock_date' in result_df.columns:
                    result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
                    restock_counts = result_df['restock_month'].value_counts().sort_index()
                    fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
                    st.plotly_chart(fig, use_container_width=True)
                
                if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
                    top_prices = result_df.sort_values('price', ascending=False).head(10)
                    fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
                    st.plotly_chart(fig, use_container_width=True)
        else:
            st.warning("No data returned from the query.")

else:
    st.info("Enter a question and click 'Ask Me' to get started!")

# Footer
st.sidebar.markdown("---")
st.sidebar.subheader("Example Queries")
st.sidebar.info("""
Try these example queries to explore the dashboard:

1. What are the top 5 most expensive products in the Electronics category?
2. What is the average price and total stock for each category?
3. List the all the products?
4. What is the distribution of prices for products supplied by each supplier?
5. Which products have a stock quantity less than 30?

Feel free to modify these queries or ask your own questions!
""")
st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")