Spaces:

ramisn
/

AutomatiX-Depot

Sleeping

App Files Files Community

AutomatiX-Depot / app.py

ramisn

gemini-2.0-flash added

783143d verified 4 months ago

raw

history blame contribute delete

10 kB

	import streamlit as st
	import os
	import pandas as pd
	import plotly.express as px
	import ast
	import google.generativeai as genai
	from io import StringIO
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Configure Genai Key
	# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

	# Function to load Google Gemini Model and provide queries as response
	def get_gemini_response(question, prompt):
	# model = genai.GenerativeModel('gemini-pro')
	model = genai.GenerativeModel("gemini-2.0-flash")
	response = model.generate_content([prompt[0], question])
	return response.text.strip()

	# Function to load data from CSV
	@st.cache_data
	def load_data():
	# This is a sample CSV content. In practice, you'd read this from a file.
	csv_content = """
	id,product_name,category,price,stock_quantity,supplier,last_restock_date
	1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
	2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
	3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
	4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
	5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
	6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
	7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
	8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
	9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
	10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
	"""
	df = pd.read_csv(StringIO(csv_content))
	df['price'] = pd.to_numeric(df['price'], errors='coerce')
	df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
	return df

	# # Function to execute pandas query
	# def execute_pandas_query(df, query):
	# try:
	# # This is a very simple and unsafe way to execute queries.
	# # In a real application, you'd need to parse the SQL and translate it to pandas operations.
	# result = eval(f"df.{query}")
	# return result
	# except Exception as e:
	# st.error(f"An error occurred: {e}")
	# return pd.DataFrame()

	# # Define Your Prompt
	# prompt = [
	# """
	# You are an expert in converting English questions to pandas DataFrame operations!
	# The DataFrame 'df' has the following columns:
	# id, product_name, category, price, stock_quantity, supplier, last_restock_date.

	# Examples:
	# - How many products do we have in total?
	# The pandas operation will be: len()
	# - What are all the products in the Electronics category?
	# The pandas operation will be: query("category == 'Electronics'")

	# The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
	# """
	# ]

	# Function to execute pandas query
	# def execute_pandas_query(df, query):
	# try:
	# # Remove any 'df.' prefixes from the query
	# query = query.replace('df.', '')

	# # Execute the query
	# if query.startswith('query'):
	# # For filtering operations
	# result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
	# elif query.startswith('groupby'):
	# # For groupby operations
	# group_col, agg_func = query.split('.', 2)[1:]
	# result = eval(f"df.groupby('{group_col}').{agg_func}")
	# else:
	# # For other operations
	# result = eval(f"df.{query}")

	# return result
	# except Exception as e:
	# st.error(f"An error occurred: {e}")
	# return pd.DataFrame()

	# # Define Your Prompt
	# prompt = [
	# """
	# You are an expert in converting English questions to pandas DataFrame operations!
	# The DataFrame 'df' has the following columns:
	# id, product_name, category, price, stock_quantity, supplier, last_restock_date.

	# Examples:
	# - How many products do we have in total?
	# The pandas operation will be: shape[0]
	# - What are all the products in the Electronics category?
	# The pandas operation will be: query("category == 'Electronics'")
	# - What is the average price of products in each category?
	# The pandas operation will be: groupby('category').mean()['price']

	# The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
	# """
	# ]

	# Function to safely evaluate a string as a Python expression
	def safe_eval(expr, df):
	try:
	# Parse the expression
	parsed = ast.parse(expr, mode='eval')

	# Define allowed names
	allowed_names = {
	'df': df,
	'query': df.query,
	'groupby': df.groupby,
	'mean': pd.DataFrame.mean,
	'sum': pd.DataFrame.sum,
	'count': pd.DataFrame.count,
	'max': pd.DataFrame.max,
	'min': pd.DataFrame.min
	}

	# Evaluate the expression
	return eval(compile(parsed, '<string>', 'eval'), allowed_names)
	except Exception as e:
	st.error(f"Error in query execution: {e}")
	return pd.DataFrame()

	# Function to execute pandas query
	def execute_pandas_query(df, query):
	try:
	# Remove any 'df.' prefixes from the query
	query = query.replace('df.', '')

	# Execute the query
	result = safe_eval(query, df)

	# Convert result to DataFrame if it's not already
	if not isinstance(result, pd.DataFrame):
	if isinstance(result, pd.Series):
	result = result.to_frame()
	else:
	result = pd.DataFrame({'Result': [result]})

	return result
	except Exception as e:
	st.error(f"An error occurred: {e}")
	return pd.DataFrame()

	# Define Your Prompt
	prompt = [
	"""
	You are an expert in converting English questions to pandas DataFrame operations!
	The DataFrame 'df' has the following columns:
	id, product_name, category, price, stock_quantity, supplier, last_restock_date.

	Examples:
	- How many products do we have in total?
	The pandas operation will be: len(df)
	- What are all the products in the Electronics category?
	The pandas operation will be: df.query("category == 'Electronics'")
	- What is the average price of products in each category?
	The pandas operation will be: df.groupby('category')['price'].mean()

	The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
	Always include 'df.' at the beginning of your operations unless you're using a function like len().
	"""
	]

	# Streamlit App
	st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")

	# Load data
	df = load_data()

	# Sidebar for user input
	st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface")
	question = st.sidebar.text_area("Enter your question:", key="input")
	submit = st.sidebar.button("Ask Me")

	# Main content area
	st.title("AutomatiX - Department Store Dashboard")

	if submit:
	with st.spinner("Generating and Fetching the data..."):
	pandas_query = get_gemini_response(question, prompt)
	# st.code(pandas_query, language="python")

	result_df = execute_pandas_query(df, pandas_query)

	if not result_df.empty:
	st.success("Query executed successfully!")

	# Display data in a table
	st.subheader("Data Table")
	st.dataframe(result_df)

	# # Create visualizations based on the data
	st.subheader("Data Visualizations")

	col1, col2 = st.columns(2)

	with col1:
	if 'price' in result_df.columns and result_df['price'].notna().any():
	fig = px.histogram(result_df, x='price', title='Price Distribution')
	st.plotly_chart(fig, use_container_width=True)

	if 'category' in result_df.columns:
	category_counts = result_df['category'].value_counts()
	fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	if 'last_restock_date' in result_df.columns:
	result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
	restock_counts = result_df['restock_month'].value_counts().sort_index()
	fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
	st.plotly_chart(fig, use_container_width=True)

	if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
	top_prices = result_df.sort_values('price', ascending=False).head(10)
	fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.warning("No data returned from the query.")

	else:
	st.info("Enter a question and click 'Ask Me' to get started!")

	# Footer
	st.sidebar.markdown("---")
	st.sidebar.subheader("Example Queries")
	st.sidebar.info("""
	Try these example queries to explore the dashboard:

	1. What are the top 5 most expensive products in the Electronics category?
	2. What is the average price and total stock for each category?
	3. List the all the products?
	4. What is the distribution of prices for products supplied by each supplier?
	5. Which products have a stock quantity less than 30?

	Feel free to modify these queries or ask your own questions!
	""")
	st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")