import pandas as pd import matplotlib.pyplot as plt import plotly.express as px import streamlit as st from transformers import pipeline # Upload CSV file containing transaction data uploaded_file = st.file_uploader("Upload Expense CSV", type="csv") if uploaded_file is not None: # Load the file into a DataFrame df = pd.read_csv(uploaded_file) # Debug: Display the column names to check if 'Description' exists st.write("Columns in the uploaded file:", df.columns) # Check if the 'Description' column exists if 'Description' not in df.columns: st.error("Error: The CSV file does not contain a 'Description' column.") else: # Initialize Hugging Face's zero-shot text classification model model_name = 'distilbert-base-uncased' classifier = pipeline('zero-shot-classification', model=model_name) # List of possible expense categories categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"] # Function to classify transaction descriptions into categories def categorize_expense(description): result = classifier(description, candidate_labels=categories) return result['labels'][0] # Choose the most probable category # Apply the categorization function to the 'Description' column in the dataset df['Category'] = df['Description'].apply(categorize_expense) # Show the categorized data st.write("Categorized Data:", df.head()) # Visualization 1: Pie Chart of Spending by Category category_expenses = df.groupby('Category')['Amount'].sum() # Plot pie chart for expense distribution by category fig1, ax1 = plt.subplots(figsize=(8, 8)) category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors, ax=ax1) ax1.set_title('Expense Distribution by Category') ax1.set_ylabel('') # Hide the y-axis label st.pyplot(fig1) # Visualization 2: Monthly Spending Trends (Line Chart) # Convert 'Date' to datetime and remove time part df['Date'] = pd.to_datetime(df['Date']).dt.date # Keep only the date, no time # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues df['Month'] = df['Date'].apply(lambda x: x.strftime('%Y-%m')) # Extract Year-Month as string # Group by month and calculate the total amount spent per month monthly_expenses = df.groupby('Month')['Amount'].sum() # Plot monthly spending trends as a line chart fig2 = px.line( monthly_expenses, x=monthly_expenses.index, y=monthly_expenses.values, title="Monthly Expenses", labels={"x": "Month", "y": "Amount ($)"} ) st.plotly_chart(fig2) # Default Budget Values default_budgets = { "Groceries": 300, "Rent": 1000, "Utilities": 150, "Entertainment": 100, "Dining": 150, "Transportation": 120, } # Sliders for adjusting the monthly budget st.write("Adjust your monthly budget for each category:") budgets = {} for category in default_budgets: budgets[category] = st.slider(f"Budget for {category} ($)", min_value=0, max_value=2000, value=default_budgets[category], step=50) # Track if any category exceeds its budget df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1) # Show which categories exceeded their budgets exceeded_budget = df[df['Budget_Exceeded'] == True] st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']]) # Visualization 3: Monthly Spending vs Budget (Bar Chart) # Create a figure explicitly for the bar chart fig3, ax3 = plt.subplots(figsize=(10, 6)) # Create figure and axes monthly_expenses_df = pd.DataFrame({ 'Actual': monthly_expenses, 'Budget': [sum(budgets.values())] * len(monthly_expenses) # Same budget for simplicity }) monthly_expenses_df.plot(kind='bar', ax=ax3) # Pass the axes to the plot ax3.set_title('Monthly Spending vs Budget') ax3.set_ylabel('Amount ($)') # Display the plot with Streamlit st.pyplot(fig3)