File size: 3,057 Bytes
23c1fce
de0992d
 
4938f65
edec0a9
 
 
 
 
 
 
 
 
 
 
 
 
f41cea2
 
edec0a9
 
 
f41cea2
 
 
edec0a9
23c1fce
 
 
 
4938f65
f41cea2
23c1fce
de0992d
4938f65
edec0a9
de0992d
 
 
 
 
 
edec0a9
 
 
f41cea2
 
de0992d
 
 
4938f65
edec0a9
 
 
 
86ec6d2
23c1fce
 
edec0a9
23c1fce
 
 
 
f41cea2
23c1fce
 
edec0a9
23c1fce
 
f41cea2
edec0a9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
import pdfplumber
import pandas as pd

# Define category mapping based on transaction keywords
CATEGORY_MAPPING = {
    "Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"],
    "Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"],
    "Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"],
    "Rent": ["Apartment", "Rent", "Landlord", "Lease"],
    "Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"],
    "Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"],
    "Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"],
    "Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"],
    "Other": []
}

# Function to classify transactions based on description
def classify_transaction(description):
    description = str(description).lower()
    for category, keywords in CATEGORY_MAPPING.items():
        if any(keyword.lower() in description for keyword in keywords):
            return category
    return "Other"

# Function to process uploaded PDF and categorize transactions
def process_pdf(file):
    if file is None:
        st.error("No file uploaded.")
        return None
    
    # Extract text from PDF
    with pdfplumber.open(file) as pdf:
        text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

    # Extract transactions (Modify based on statement format)
    lines = text.split("\n")
    transactions = [line for line in lines if any(char.isdigit() for char in line)]  

    # Convert to DataFrame
    df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])

    # Ensure amount column is numeric
    df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce")

    # Ensure no missing descriptions
    df["Description"] = df["Description"].fillna("Unknown")

    # Apply classification
    df["Category"] = df["Description"].apply(classify_transaction)

    # Summarize total spending per category
    category_summary = df.groupby("Category")["Amount"].sum().reset_index()

    return df, category_summary  # Return full transactions and summary

# Streamlit UI
st.title("πŸ“„ Credit Card Statement Classifier")
st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.")

uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])

if uploaded_file is not None:
    st.success("βœ… File uploaded successfully!")
    
    # Process and display transactions
    df_result, category_summary = process_pdf(uploaded_file)
    
    if df_result is not None:
        st.write("### πŸ“Š Classified Transactions:")
        st.dataframe(df_result)  # Display detailed transactions
        
        st.write("### πŸ’° Spending Summary by Category:")
        st.dataframe(category_summary)  # Display spending summary