Spaces:

ramisn
/

AutomatiX-Depot

Sleeping

File size: 9,967 Bytes

import streamlit as st
import os
import pandas as pd
import plotly.express as px
import ast
import google.generativeai as genai
from io import StringIO
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configure Genai Key
# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Function to load Google Gemini Model and provide queries as response
def get_gemini_response(question, prompt):
    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content([prompt[0], question])
    return response.text.strip()

# Function to load data from CSV
@st.cache_data
def load_data():
    # This is a sample CSV content. In practice, you'd read this from a file.
    csv_content = """
id,product_name,category,price,stock_quantity,supplier,last_restock_date
1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
"""
    df = pd.read_csv(StringIO(csv_content))
    df['price'] = pd.to_numeric(df['price'], errors='coerce')
    df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
    return df

# # Function to execute pandas query
# def execute_pandas_query(df, query):
#     try:
#         # This is a very simple and unsafe way to execute queries.
#         # In a real application, you'd need to parse the SQL and translate it to pandas operations.
#         result = eval(f"df.{query}")
#         return result
#     except Exception as e:
#         st.error(f"An error occurred: {e}")
#         return pd.DataFrame()

# # Define Your Prompt
# prompt = [
#     """
#     You are an expert in converting English questions to pandas DataFrame operations!
#     The DataFrame 'df' has the following columns: 
#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
    
#     Examples:
#     - How many products do we have in total? 
#       The pandas operation will be: len()
#     - What are all the products in the Electronics category? 
#       The pandas operation will be: query("category == 'Electronics'")
    
#     The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
#     """
# ]

# Function to execute pandas query
# def execute_pandas_query(df, query):
#     try:
#         # Remove any 'df.' prefixes from the query
#         query = query.replace('df.', '')
        
#         # Execute the query
#         if query.startswith('query'):
#             # For filtering operations
#             result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
#         elif query.startswith('groupby'):
#             # For groupby operations
#             group_col, agg_func = query.split('.', 2)[1:]
#             result = eval(f"df.groupby('{group_col}').{agg_func}")
#         else:
#             # For other operations
#             result = eval(f"df.{query}")
        
#         return result
#     except Exception as e:
#         st.error(f"An error occurred: {e}")
#         return pd.DataFrame()

# # Define Your Prompt
# prompt = [
#     """
#     You are an expert in converting English questions to pandas DataFrame operations!
#     The DataFrame 'df' has the following columns: 
#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
    
#     Examples:
#     - How many products do we have in total? 
#       The pandas operation will be: shape[0]
#     - What are all the products in the Electronics category? 
#       The pandas operation will be: query("category == 'Electronics'")
#     - What is the average price of products in each category?
#       The pandas operation will be: groupby('category').mean()['price']
    
#     The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
#     """
# ]

# Function to safely evaluate a string as a Python expression
def safe_eval(expr, df):
    try:
        # Parse the expression
        parsed = ast.parse(expr, mode='eval')
        
        # Define allowed names
        allowed_names = {
            'df': df,
            'query': df.query,
            'groupby': df.groupby,
            'mean': pd.DataFrame.mean,
            'sum': pd.DataFrame.sum,
            'count': pd.DataFrame.count,
            'max': pd.DataFrame.max,
            'min': pd.DataFrame.min
        }
        
        # Evaluate the expression
        return eval(compile(parsed, '<string>', 'eval'), allowed_names)
    except Exception as e:
        st.error(f"Error in query execution: {e}")
        return pd.DataFrame()

# Function to execute pandas query
def execute_pandas_query(df, query):
    try:
        # Remove any 'df.' prefixes from the query
        query = query.replace('df.', '')
        
        # Execute the query
        result = safe_eval(query, df)
        
        # Convert result to DataFrame if it's not already
        if not isinstance(result, pd.DataFrame):
            if isinstance(result, pd.Series):
                result = result.to_frame()
            else:
                result = pd.DataFrame({'Result': [result]})
        
        return result
    except Exception as e:
        st.error(f"An error occurred: {e}")
        return pd.DataFrame()

# Define Your Prompt
prompt = [
    """
    You are an expert in converting English questions to pandas DataFrame operations!
    The DataFrame 'df' has the following columns: 
    id, product_name, category, price, stock_quantity, supplier, last_restock_date.
    
    Examples:
    - How many products do we have in total? 
      The pandas operation will be: len(df)
    - What are all the products in the Electronics category? 
      The pandas operation will be: df.query("category == 'Electronics'")
    - What is the average price of products in each category?
      The pandas operation will be: df.groupby('category')['price'].mean()
    
    The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
    Always include 'df.' at the beginning of your operations unless you're using a function like len().
    """
]

# Streamlit App
st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")

# Load data
df = load_data()

# Sidebar for user input
st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface")
question = st.sidebar.text_area("Enter your question:", key="input")
submit = st.sidebar.button("Ask Me")

# Main content area
st.title("AutomatiX - Department Store Dashboard")

if submit:
    with st.spinner("Generating and Fetching the data..."):
        pandas_query = get_gemini_response(question, prompt)
        # st.code(pandas_query, language="python")
        
        result_df = execute_pandas_query(df, pandas_query)
        
        if not result_df.empty:
            st.success("Query executed successfully!")
            
            # Display data in a table
            st.subheader("Data Table")
            st.dataframe(result_df)
            
            # # Create visualizations based on the data
            st.subheader("Data Visualizations")
            
            col1, col2 = st.columns(2)
            
            with col1:
                if 'price' in result_df.columns and result_df['price'].notna().any():
                    fig = px.histogram(result_df, x='price', title='Price Distribution')
                    st.plotly_chart(fig, use_container_width=True)
                
                if 'category' in result_df.columns:
                    category_counts = result_df['category'].value_counts()
                    fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
                    st.plotly_chart(fig, use_container_width=True)
            
            with col2:
                if 'last_restock_date' in result_df.columns:
                    result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
                    restock_counts = result_df['restock_month'].value_counts().sort_index()
                    fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
                    st.plotly_chart(fig, use_container_width=True)
                
                if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
                    top_prices = result_df.sort_values('price', ascending=False).head(10)
                    fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
                    st.plotly_chart(fig, use_container_width=True)
        else:
            st.warning("No data returned from the query.")

else:
    st.info("Enter a question and click 'Ask Me' to get started!")

# Footer
st.sidebar.markdown("---")
st.sidebar.subheader("Example Queries")
st.sidebar.info("""
Try these example queries to explore the dashboard:

1. What are the top 5 most expensive products in the Electronics category?
2. What is the average price and total stock for each category?
3. List the all the products?
4. What is the distribution of prices for products supplied by each supplier?
5. Which products have a stock quantity less than 30?

Feel free to modify these queries or ask your own questions!
""")
st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")