Spaces:
Sleeping
Sleeping
File size: 9,967 Bytes
81149e0 823be3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
import streamlit as st
import os
import pandas as pd
import plotly.express as px
import ast
import google.generativeai as genai
from io import StringIO
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Configure Genai Key
# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
# Function to load Google Gemini Model and provide queries as response
def get_gemini_response(question, prompt):
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content([prompt[0], question])
return response.text.strip()
# Function to load data from CSV
@st.cache_data
def load_data():
# This is a sample CSV content. In practice, you'd read this from a file.
csv_content = """
id,product_name,category,price,stock_quantity,supplier,last_restock_date
1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
"""
df = pd.read_csv(StringIO(csv_content))
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
return df
# # Function to execute pandas query
# def execute_pandas_query(df, query):
# try:
# # This is a very simple and unsafe way to execute queries.
# # In a real application, you'd need to parse the SQL and translate it to pandas operations.
# result = eval(f"df.{query}")
# return result
# except Exception as e:
# st.error(f"An error occurred: {e}")
# return pd.DataFrame()
# # Define Your Prompt
# prompt = [
# """
# You are an expert in converting English questions to pandas DataFrame operations!
# The DataFrame 'df' has the following columns:
# id, product_name, category, price, stock_quantity, supplier, last_restock_date.
# Examples:
# - How many products do we have in total?
# The pandas operation will be: len()
# - What are all the products in the Electronics category?
# The pandas operation will be: query("category == 'Electronics'")
# The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
# """
# ]
# Function to execute pandas query
# def execute_pandas_query(df, query):
# try:
# # Remove any 'df.' prefixes from the query
# query = query.replace('df.', '')
# # Execute the query
# if query.startswith('query'):
# # For filtering operations
# result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
# elif query.startswith('groupby'):
# # For groupby operations
# group_col, agg_func = query.split('.', 2)[1:]
# result = eval(f"df.groupby('{group_col}').{agg_func}")
# else:
# # For other operations
# result = eval(f"df.{query}")
# return result
# except Exception as e:
# st.error(f"An error occurred: {e}")
# return pd.DataFrame()
# # Define Your Prompt
# prompt = [
# """
# You are an expert in converting English questions to pandas DataFrame operations!
# The DataFrame 'df' has the following columns:
# id, product_name, category, price, stock_quantity, supplier, last_restock_date.
# Examples:
# - How many products do we have in total?
# The pandas operation will be: shape[0]
# - What are all the products in the Electronics category?
# The pandas operation will be: query("category == 'Electronics'")
# - What is the average price of products in each category?
# The pandas operation will be: groupby('category').mean()['price']
# The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
# """
# ]
# Function to safely evaluate a string as a Python expression
def safe_eval(expr, df):
try:
# Parse the expression
parsed = ast.parse(expr, mode='eval')
# Define allowed names
allowed_names = {
'df': df,
'query': df.query,
'groupby': df.groupby,
'mean': pd.DataFrame.mean,
'sum': pd.DataFrame.sum,
'count': pd.DataFrame.count,
'max': pd.DataFrame.max,
'min': pd.DataFrame.min
}
# Evaluate the expression
return eval(compile(parsed, '<string>', 'eval'), allowed_names)
except Exception as e:
st.error(f"Error in query execution: {e}")
return pd.DataFrame()
# Function to execute pandas query
def execute_pandas_query(df, query):
try:
# Remove any 'df.' prefixes from the query
query = query.replace('df.', '')
# Execute the query
result = safe_eval(query, df)
# Convert result to DataFrame if it's not already
if not isinstance(result, pd.DataFrame):
if isinstance(result, pd.Series):
result = result.to_frame()
else:
result = pd.DataFrame({'Result': [result]})
return result
except Exception as e:
st.error(f"An error occurred: {e}")
return pd.DataFrame()
# Define Your Prompt
prompt = [
"""
You are an expert in converting English questions to pandas DataFrame operations!
The DataFrame 'df' has the following columns:
id, product_name, category, price, stock_quantity, supplier, last_restock_date.
Examples:
- How many products do we have in total?
The pandas operation will be: len(df)
- What are all the products in the Electronics category?
The pandas operation will be: df.query("category == 'Electronics'")
- What is the average price of products in each category?
The pandas operation will be: df.groupby('category')['price'].mean()
The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
Always include 'df.' at the beginning of your operations unless you're using a function like len().
"""
]
# Streamlit App
st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")
# Load data
df = load_data()
# Sidebar for user input
st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface")
question = st.sidebar.text_area("Enter your question:", key="input")
submit = st.sidebar.button("Ask Me")
# Main content area
st.title("AutomatiX - Department Store Dashboard")
if submit:
with st.spinner("Generating and Fetching the data..."):
pandas_query = get_gemini_response(question, prompt)
# st.code(pandas_query, language="python")
result_df = execute_pandas_query(df, pandas_query)
if not result_df.empty:
st.success("Query executed successfully!")
# Display data in a table
st.subheader("Data Table")
st.dataframe(result_df)
# # Create visualizations based on the data
st.subheader("Data Visualizations")
col1, col2 = st.columns(2)
with col1:
if 'price' in result_df.columns and result_df['price'].notna().any():
fig = px.histogram(result_df, x='price', title='Price Distribution')
st.plotly_chart(fig, use_container_width=True)
if 'category' in result_df.columns:
category_counts = result_df['category'].value_counts()
fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
st.plotly_chart(fig, use_container_width=True)
with col2:
if 'last_restock_date' in result_df.columns:
result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
restock_counts = result_df['restock_month'].value_counts().sort_index()
fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
st.plotly_chart(fig, use_container_width=True)
if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
top_prices = result_df.sort_values('price', ascending=False).head(10)
fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("No data returned from the query.")
else:
st.info("Enter a question and click 'Ask Me' to get started!")
# Footer
st.sidebar.markdown("---")
st.sidebar.subheader("Example Queries")
st.sidebar.info("""
Try these example queries to explore the dashboard:
1. What are the top 5 most expensive products in the Electronics category?
2. What is the average price and total stock for each category?
3. List the all the products?
4. What is the distribution of prices for products supplied by each supplier?
5. Which products have a stock quantity less than 30?
Feel free to modify these queries or ask your own questions!
""")
st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini") |