Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import pandas as pd | |
| import plotly.express as px | |
| import ast | |
| import google.generativeai as genai | |
| from io import StringIO | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| # Configure Genai Key | |
| # genai.configure(api_key=os.environ.get("GOOGLE_API_KEY")) | |
| genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
| # Function to load Google Gemini Model and provide queries as response | |
| def get_gemini_response(question, prompt): | |
| # model = genai.GenerativeModel('gemini-pro') | |
| model = genai.GenerativeModel("gemini-2.0-flash") | |
| response = model.generate_content([prompt[0], question]) | |
| return response.text.strip() | |
| # Function to load data from CSV | |
| def load_data(): | |
| # This is a sample CSV content. In practice, you'd read this from a file. | |
| csv_content = """ | |
| id,product_name,category,price,stock_quantity,supplier,last_restock_date | |
| 1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01 | |
| 2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15 | |
| 3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10 | |
| 4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20 | |
| 5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05 | |
| 6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28 | |
| 7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15 | |
| 8,Backpack,Bags,39.99,60,TravelGear,2024-02-10 | |
| 9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20 | |
| 10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30 | |
| """ | |
| df = pd.read_csv(StringIO(csv_content)) | |
| df['price'] = pd.to_numeric(df['price'], errors='coerce') | |
| df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce') | |
| return df | |
| # # Function to execute pandas query | |
| # def execute_pandas_query(df, query): | |
| # try: | |
| # # This is a very simple and unsafe way to execute queries. | |
| # # In a real application, you'd need to parse the SQL and translate it to pandas operations. | |
| # result = eval(f"df.{query}") | |
| # return result | |
| # except Exception as e: | |
| # st.error(f"An error occurred: {e}") | |
| # return pd.DataFrame() | |
| # # Define Your Prompt | |
| # prompt = [ | |
| # """ | |
| # You are an expert in converting English questions to pandas DataFrame operations! | |
| # The DataFrame 'df' has the following columns: | |
| # id, product_name, category, price, stock_quantity, supplier, last_restock_date. | |
| # Examples: | |
| # - How many products do we have in total? | |
| # The pandas operation will be: len() | |
| # - What are all the products in the Electronics category? | |
| # The pandas operation will be: query("category == 'Electronics'") | |
| # The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'. | |
| # """ | |
| # ] | |
| # Function to execute pandas query | |
| # def execute_pandas_query(df, query): | |
| # try: | |
| # # Remove any 'df.' prefixes from the query | |
| # query = query.replace('df.', '') | |
| # # Execute the query | |
| # if query.startswith('query'): | |
| # # For filtering operations | |
| # result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\'')) | |
| # elif query.startswith('groupby'): | |
| # # For groupby operations | |
| # group_col, agg_func = query.split('.', 2)[1:] | |
| # result = eval(f"df.groupby('{group_col}').{agg_func}") | |
| # else: | |
| # # For other operations | |
| # result = eval(f"df.{query}") | |
| # return result | |
| # except Exception as e: | |
| # st.error(f"An error occurred: {e}") | |
| # return pd.DataFrame() | |
| # # Define Your Prompt | |
| # prompt = [ | |
| # """ | |
| # You are an expert in converting English questions to pandas DataFrame operations! | |
| # The DataFrame 'df' has the following columns: | |
| # id, product_name, category, price, stock_quantity, supplier, last_restock_date. | |
| # Examples: | |
| # - How many products do we have in total? | |
| # The pandas operation will be: shape[0] | |
| # - What are all the products in the Electronics category? | |
| # The pandas operation will be: query("category == 'Electronics'") | |
| # - What is the average price of products in each category? | |
| # The pandas operation will be: groupby('category').mean()['price'] | |
| # The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix. | |
| # """ | |
| # ] | |
| # Function to safely evaluate a string as a Python expression | |
| def safe_eval(expr, df): | |
| try: | |
| # Parse the expression | |
| parsed = ast.parse(expr, mode='eval') | |
| # Define allowed names | |
| allowed_names = { | |
| 'df': df, | |
| 'query': df.query, | |
| 'groupby': df.groupby, | |
| 'mean': pd.DataFrame.mean, | |
| 'sum': pd.DataFrame.sum, | |
| 'count': pd.DataFrame.count, | |
| 'max': pd.DataFrame.max, | |
| 'min': pd.DataFrame.min | |
| } | |
| # Evaluate the expression | |
| return eval(compile(parsed, '<string>', 'eval'), allowed_names) | |
| except Exception as e: | |
| st.error(f"Error in query execution: {e}") | |
| return pd.DataFrame() | |
| # Function to execute pandas query | |
| def execute_pandas_query(df, query): | |
| try: | |
| # Remove any 'df.' prefixes from the query | |
| query = query.replace('df.', '') | |
| # Execute the query | |
| result = safe_eval(query, df) | |
| # Convert result to DataFrame if it's not already | |
| if not isinstance(result, pd.DataFrame): | |
| if isinstance(result, pd.Series): | |
| result = result.to_frame() | |
| else: | |
| result = pd.DataFrame({'Result': [result]}) | |
| return result | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |
| return pd.DataFrame() | |
| # Define Your Prompt | |
| prompt = [ | |
| """ | |
| You are an expert in converting English questions to pandas DataFrame operations! | |
| The DataFrame 'df' has the following columns: | |
| id, product_name, category, price, stock_quantity, supplier, last_restock_date. | |
| Examples: | |
| - How many products do we have in total? | |
| The pandas operation will be: len(df) | |
| - What are all the products in the Electronics category? | |
| The pandas operation will be: df.query("category == 'Electronics'") | |
| - What is the average price of products in each category? | |
| The pandas operation will be: df.groupby('category')['price'].mean() | |
| The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'. | |
| Always include 'df.' at the beginning of your operations unless you're using a function like len(). | |
| """ | |
| ] | |
| # Streamlit App | |
| st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide") | |
| # Load data | |
| df = load_data() | |
| # Sidebar for user input | |
| st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface") | |
| question = st.sidebar.text_area("Enter your question:", key="input") | |
| submit = st.sidebar.button("Ask Me") | |
| # Main content area | |
| st.title("AutomatiX - Department Store Dashboard") | |
| if submit: | |
| with st.spinner("Generating and Fetching the data..."): | |
| pandas_query = get_gemini_response(question, prompt) | |
| # st.code(pandas_query, language="python") | |
| result_df = execute_pandas_query(df, pandas_query) | |
| if not result_df.empty: | |
| st.success("Query executed successfully!") | |
| # Display data in a table | |
| st.subheader("Data Table") | |
| st.dataframe(result_df) | |
| # # Create visualizations based on the data | |
| st.subheader("Data Visualizations") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if 'price' in result_df.columns and result_df['price'].notna().any(): | |
| fig = px.histogram(result_df, x='price', title='Price Distribution') | |
| st.plotly_chart(fig, use_container_width=True) | |
| if 'category' in result_df.columns: | |
| category_counts = result_df['category'].value_counts() | |
| fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| if 'last_restock_date' in result_df.columns: | |
| result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M') | |
| restock_counts = result_df['restock_month'].value_counts().sort_index() | |
| fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend') | |
| st.plotly_chart(fig, use_container_width=True) | |
| if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any(): | |
| top_prices = result_df.sort_values('price', ascending=False).head(10) | |
| fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products') | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| st.warning("No data returned from the query.") | |
| else: | |
| st.info("Enter a question and click 'Ask Me' to get started!") | |
| # Footer | |
| st.sidebar.markdown("---") | |
| st.sidebar.subheader("Example Queries") | |
| st.sidebar.info(""" | |
| Try these example queries to explore the dashboard: | |
| 1. What are the top 5 most expensive products in the Electronics category? | |
| 2. What is the average price and total stock for each category? | |
| 3. List the all the products? | |
| 4. What is the distribution of prices for products supplied by each supplier? | |
| 5. Which products have a stock quantity less than 30? | |
| Feel free to modify these queries or ask your own questions! | |
| """) | |
| st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini") |