Spaces:
Sleeping
Sleeping
Rami
commited on
Commit
·
fd1fd02
1
Parent(s):
8c34bef
CSV DATA Added
Browse files- app_csv.py +129 -0
app_csv.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
+
from io import StringIO
|
| 7 |
+
|
| 8 |
+
# Configure Genai Key
|
| 9 |
+
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
|
| 10 |
+
|
| 11 |
+
# Function to load Google Gemini Model and provide queries as response
|
| 12 |
+
def get_gemini_response(question, prompt):
|
| 13 |
+
model = genai.GenerativeModel('gemini-pro')
|
| 14 |
+
response = model.generate_content([prompt[0], question])
|
| 15 |
+
return response.text.strip()
|
| 16 |
+
|
| 17 |
+
# Function to load data from CSV
|
| 18 |
+
@st.cache_data
|
| 19 |
+
def load_data():
|
| 20 |
+
# This is a sample CSV content. In practice, you'd read this from a file.
|
| 21 |
+
csv_content = """
|
| 22 |
+
id,product_name,category,price,stock_quantity,supplier,last_restock_date
|
| 23 |
+
1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
|
| 24 |
+
2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
|
| 25 |
+
3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
|
| 26 |
+
4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
|
| 27 |
+
5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
|
| 28 |
+
6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
|
| 29 |
+
7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
|
| 30 |
+
8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
|
| 31 |
+
9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
|
| 32 |
+
10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
|
| 33 |
+
"""
|
| 34 |
+
df = pd.read_csv(StringIO(csv_content))
|
| 35 |
+
df['price'] = pd.to_numeric(df['price'], errors='coerce')
|
| 36 |
+
df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
|
| 37 |
+
return df
|
| 38 |
+
|
| 39 |
+
# Function to execute pandas query
|
| 40 |
+
def execute_pandas_query(df, query):
|
| 41 |
+
try:
|
| 42 |
+
# This is a very simple and unsafe way to execute queries.
|
| 43 |
+
# In a real application, you'd need to parse the SQL and translate it to pandas operations.
|
| 44 |
+
result = eval(f"df.{query}")
|
| 45 |
+
return result
|
| 46 |
+
except Exception as e:
|
| 47 |
+
st.error(f"An error occurred: {e}")
|
| 48 |
+
return pd.DataFrame()
|
| 49 |
+
|
| 50 |
+
# Define Your Prompt
|
| 51 |
+
prompt = [
|
| 52 |
+
"""
|
| 53 |
+
You are an expert in converting English questions to pandas DataFrame operations!
|
| 54 |
+
The DataFrame 'df' has the following columns:
|
| 55 |
+
id, product_name, category, price, stock_quantity, supplier, last_restock_date.
|
| 56 |
+
|
| 57 |
+
Examples:
|
| 58 |
+
- How many products do we have in total?
|
| 59 |
+
The pandas operation will be: len()
|
| 60 |
+
- What are all the products in the Electronics category?
|
| 61 |
+
The pandas operation will be: query("category == 'Electronics'")
|
| 62 |
+
|
| 63 |
+
The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
|
| 64 |
+
"""
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
# Streamlit App
|
| 68 |
+
st.set_page_config(page_title="Department Store Analytics", layout="wide")
|
| 69 |
+
|
| 70 |
+
# Load data
|
| 71 |
+
df = load_data()
|
| 72 |
+
|
| 73 |
+
# Sidebar for user input
|
| 74 |
+
st.sidebar.title("Department Store Query Interface")
|
| 75 |
+
question = st.sidebar.text_area("Enter your question:", key="input")
|
| 76 |
+
submit = st.sidebar.button("Ask Me")
|
| 77 |
+
|
| 78 |
+
# Main content area
|
| 79 |
+
st.title("Department Store Dashboard")
|
| 80 |
+
|
| 81 |
+
if submit:
|
| 82 |
+
with st.spinner("Generating query and fetching data..."):
|
| 83 |
+
pandas_query = get_gemini_response(question, prompt)
|
| 84 |
+
st.code(pandas_query, language="python")
|
| 85 |
+
|
| 86 |
+
result_df = execute_pandas_query(df, pandas_query)
|
| 87 |
+
|
| 88 |
+
if not result_df.empty:
|
| 89 |
+
st.success("Query executed successfully!")
|
| 90 |
+
|
| 91 |
+
# Display data in a table
|
| 92 |
+
st.subheader("Data Table")
|
| 93 |
+
st.dataframe(result_df)
|
| 94 |
+
|
| 95 |
+
# Create visualizations based on the data
|
| 96 |
+
st.subheader("Data Visualizations")
|
| 97 |
+
|
| 98 |
+
col1, col2 = st.columns(2)
|
| 99 |
+
|
| 100 |
+
with col1:
|
| 101 |
+
if 'price' in result_df.columns and result_df['price'].notna().any():
|
| 102 |
+
fig = px.histogram(result_df, x='price', title='Price Distribution')
|
| 103 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 104 |
+
|
| 105 |
+
if 'category' in result_df.columns:
|
| 106 |
+
category_counts = result_df['category'].value_counts()
|
| 107 |
+
fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
|
| 108 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 109 |
+
|
| 110 |
+
with col2:
|
| 111 |
+
if 'last_restock_date' in result_df.columns:
|
| 112 |
+
result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
|
| 113 |
+
restock_counts = result_df['restock_month'].value_counts().sort_index()
|
| 114 |
+
fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
|
| 115 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 116 |
+
|
| 117 |
+
if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
|
| 118 |
+
top_prices = result_df.sort_values('price', ascending=False).head(10)
|
| 119 |
+
fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
|
| 120 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 121 |
+
else:
|
| 122 |
+
st.warning("No data returned from the query.")
|
| 123 |
+
|
| 124 |
+
else:
|
| 125 |
+
st.info("Enter a question and click 'Ask Me' to get started!")
|
| 126 |
+
|
| 127 |
+
# Footer
|
| 128 |
+
st.sidebar.markdown("---")
|
| 129 |
+
st.sidebar.warning("AutomatiX - Department Store Analytics - Powered by Streamlit and Google Gemini")
|