File size: 4,209 Bytes
f10ec56
 
2c359f1
c7d0bb8
 
f10ec56
c7d0bb8
 
2c359f1
a6ee9ca
c7d0bb8
a6ee9ca
6ca4f9e
11d5829
 
a6ee9ca
11d5829
 
 
 
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
631a831
 
 
 
 
 
 
 
 
 
 
77dc4ed
 
631a831
 
77dc4ed
631a831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st
from transformers import pipeline

# Upload CSV file containing transaction data
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")

if uploaded_file is not None:
    # Load the file into a DataFrame
    df = pd.read_csv(uploaded_file)

    # Debug: Display the column names to check if 'Description' exists
    st.write("Columns in the uploaded file:", df.columns)

    # Check if the 'Description' column exists
    if 'Description' not in df.columns:
        st.error("Error: The CSV file does not contain a 'Description' column.")
    else:
        # Initialize Hugging Face's zero-shot text classification model
        model_name = 'distilbert-base-uncased'
        classifier = pipeline('zero-shot-classification', model=model_name)

        # List of possible expense categories
        categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]

        # Function to classify transaction descriptions into categories
        def categorize_expense(description):
            result = classifier(description, candidate_labels=categories)
            return result['labels'][0]  # Choose the most probable category

        # Apply the categorization function to the 'Description' column in the dataset
        df['Category'] = df['Description'].apply(categorize_expense)

        # Show the categorized data
        st.write("Categorized Data:", df.head())

        # Visualization 1: Pie Chart of Spending by Category
        category_expenses = df.groupby('Category')['Amount'].sum()

        # Plot pie chart for expense distribution by category
        fig1, ax1 = plt.subplots(figsize=(8, 8))
        category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors, ax=ax1)
        ax1.set_title('Expense Distribution by Category')
        ax1.set_ylabel('')  # Hide the y-axis label
        st.pyplot(fig1)

        # Visualization 2: Monthly Spending Trends (Line Chart)
        # Convert 'Date' to datetime and remove time part
        df['Date'] = pd.to_datetime(df['Date']).dt.date  # Keep only the date, no time

        # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues
        df['Month'] = df['Date'].apply(lambda x: x.strftime('%Y-%m'))  # Extract Year-Month as string

        # Group by month and calculate the total amount spent per month
        monthly_expenses = df.groupby('Month')['Amount'].sum()

        # Plot monthly spending trends as a line chart
        fig2 = px.line(
            monthly_expenses, 
            x=monthly_expenses.index, 
            y=monthly_expenses.values, 
            title="Monthly Expenses", 
            labels={"x": "Month", "y": "Amount ($)"}
        )
        st.plotly_chart(fig2)

        # Budget and Alerts Example (Tracking if any category exceeds its budget)
        budgets = {
            "Groceries": 300,
            "Rent": 1000,
            "Utilities": 150,
            "Entertainment": 100,
            "Dining": 150,
            "Transportation": 120,
        }

        # Track if any category exceeds its budget
        df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)

        # Show which categories exceeded their budgets
        exceeded_budget = df[df['Budget_Exceeded'] == True]
        st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])

        # Visualization 3: Monthly Spending vs Budget (Bar Chart)
        # Create a figure explicitly for the bar chart
        fig3, ax3 = plt.subplots(figsize=(10, 6))  # Create figure and axes
        monthly_expenses_df = pd.DataFrame({
            'Actual': monthly_expenses,
            'Budget': [sum(budgets.values())] * len(monthly_expenses)  # Same budget for simplicity
        })
        monthly_expenses_df.plot(kind='bar', ax=ax3)  # Pass the axes to the plot
        ax3.set_title('Monthly Spending vs Budget')
        ax3.set_ylabel('Amount ($)')

        # Display the plot with Streamlit
        st.pyplot(fig3)