import streamlit as st import os import pandas as pd import plotly.express as px import ast import google.generativeai as genai from io import StringIO from dotenv import load_dotenv # Load environment variables load_dotenv() # Configure Genai Key # genai.configure(api_key=os.environ.get("GOOGLE_API_KEY")) genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Function to load Google Gemini Model and provide queries as response def get_gemini_response(question, prompt): model = genai.GenerativeModel('gemini-pro') response = model.generate_content([prompt[0], question]) return response.text.strip() # Function to load data from CSV @st.cache_data def load_data(): # This is a sample CSV content. In practice, you'd read this from a file. csv_content = """ id,product_name,category,price,stock_quantity,supplier,last_restock_date 1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01 2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15 3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10 4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20 5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05 6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28 7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15 8,Backpack,Bags,39.99,60,TravelGear,2024-02-10 9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20 10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30 """ df = pd.read_csv(StringIO(csv_content)) df['price'] = pd.to_numeric(df['price'], errors='coerce') df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce') return df # # Function to execute pandas query # def execute_pandas_query(df, query): # try: # # This is a very simple and unsafe way to execute queries. # # In a real application, you'd need to parse the SQL and translate it to pandas operations. # result = eval(f"df.{query}") # return result # except Exception as e: # st.error(f"An error occurred: {e}") # return pd.DataFrame() # # Define Your Prompt # prompt = [ # """ # You are an expert in converting English questions to pandas DataFrame operations! # The DataFrame 'df' has the following columns: # id, product_name, category, price, stock_quantity, supplier, last_restock_date. # Examples: # - How many products do we have in total? # The pandas operation will be: len() # - What are all the products in the Electronics category? # The pandas operation will be: query("category == 'Electronics'") # The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'. # """ # ] # Function to execute pandas query # def execute_pandas_query(df, query): # try: # # Remove any 'df.' prefixes from the query # query = query.replace('df.', '') # # Execute the query # if query.startswith('query'): # # For filtering operations # result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\'')) # elif query.startswith('groupby'): # # For groupby operations # group_col, agg_func = query.split('.', 2)[1:] # result = eval(f"df.groupby('{group_col}').{agg_func}") # else: # # For other operations # result = eval(f"df.{query}") # return result # except Exception as e: # st.error(f"An error occurred: {e}") # return pd.DataFrame() # # Define Your Prompt # prompt = [ # """ # You are an expert in converting English questions to pandas DataFrame operations! # The DataFrame 'df' has the following columns: # id, product_name, category, price, stock_quantity, supplier, last_restock_date. # Examples: # - How many products do we have in total? # The pandas operation will be: shape[0] # - What are all the products in the Electronics category? # The pandas operation will be: query("category == 'Electronics'") # - What is the average price of products in each category? # The pandas operation will be: groupby('category').mean()['price'] # The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix. # """ # ] # Function to safely evaluate a string as a Python expression def safe_eval(expr, df): try: # Parse the expression parsed = ast.parse(expr, mode='eval') # Define allowed names allowed_names = { 'df': df, 'query': df.query, 'groupby': df.groupby, 'mean': pd.DataFrame.mean, 'sum': pd.DataFrame.sum, 'count': pd.DataFrame.count, 'max': pd.DataFrame.max, 'min': pd.DataFrame.min } # Evaluate the expression return eval(compile(parsed, '', 'eval'), allowed_names) except Exception as e: st.error(f"Error in query execution: {e}") return pd.DataFrame() # Function to execute pandas query def execute_pandas_query(df, query): try: # Remove any 'df.' prefixes from the query query = query.replace('df.', '') # Execute the query result = safe_eval(query, df) # Convert result to DataFrame if it's not already if not isinstance(result, pd.DataFrame): if isinstance(result, pd.Series): result = result.to_frame() else: result = pd.DataFrame({'Result': [result]}) return result except Exception as e: st.error(f"An error occurred: {e}") return pd.DataFrame() # Define Your Prompt prompt = [ """ You are an expert in converting English questions to pandas DataFrame operations! The DataFrame 'df' has the following columns: id, product_name, category, price, stock_quantity, supplier, last_restock_date. Examples: - How many products do we have in total? The pandas operation will be: len(df) - What are all the products in the Electronics category? The pandas operation will be: df.query("category == 'Electronics'") - What is the average price of products in each category? The pandas operation will be: df.groupby('category')['price'].mean() The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'. Always include 'df.' at the beginning of your operations unless you're using a function like len(). """ ] # Streamlit App st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide") # Load data df = load_data() # Sidebar for user input st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface") question = st.sidebar.text_area("Enter your question:", key="input") submit = st.sidebar.button("Ask Me") # Main content area st.title("AutomatiX - Department Store Dashboard") if submit: with st.spinner("Generating and Fetching the data..."): pandas_query = get_gemini_response(question, prompt) # st.code(pandas_query, language="python") result_df = execute_pandas_query(df, pandas_query) if not result_df.empty: st.success("Query executed successfully!") # Display data in a table st.subheader("Data Table") st.dataframe(result_df) # # Create visualizations based on the data st.subheader("Data Visualizations") col1, col2 = st.columns(2) with col1: if 'price' in result_df.columns and result_df['price'].notna().any(): fig = px.histogram(result_df, x='price', title='Price Distribution') st.plotly_chart(fig, use_container_width=True) if 'category' in result_df.columns: category_counts = result_df['category'].value_counts() fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category') st.plotly_chart(fig, use_container_width=True) with col2: if 'last_restock_date' in result_df.columns: result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M') restock_counts = result_df['restock_month'].value_counts().sort_index() fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend') st.plotly_chart(fig, use_container_width=True) if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any(): top_prices = result_df.sort_values('price', ascending=False).head(10) fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products') st.plotly_chart(fig, use_container_width=True) else: st.warning("No data returned from the query.") else: st.info("Enter a question and click 'Ask Me' to get started!") # Footer st.sidebar.markdown("---") st.sidebar.subheader("Example Queries") st.sidebar.info(""" Try these example queries to explore the dashboard: 1. What are the top 5 most expensive products in the Electronics category? 2. What is the average price and total stock for each category? 3. List the all the products? 4. What is the distribution of prices for products supplied by each supplier? 5. Which products have a stock quantity less than 30? Feel free to modify these queries or ask your own questions! """) st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")