File size: 2,149 Bytes
23c1fce
de0992d
 
4938f65
f41cea2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23c1fce
 
 
 
4938f65
f41cea2
23c1fce
de0992d
4938f65
f41cea2
de0992d
 
 
 
 
 
f41cea2
 
de0992d
 
 
4938f65
23c1fce
86ec6d2
23c1fce
 
f41cea2
23c1fce
 
 
 
f41cea2
23c1fce
 
 
 
 
f41cea2
23c1fce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
import pdfplumber
import pandas as pd

# Function to classify transactions based on description
def classify_transaction(description):
    if not isinstance(description, str):  # Ensure description is a string
        return "Unknown"

    categories = {
        "Grocery": ["Walmart", "Kroger", "Whole Foods"],
        "Dining": ["McDonald's", "Starbucks", "Chipotle"],
        "Bills": ["Verizon", "AT&T", "Con Edison"],
        "Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
        "Transport": ["Uber", "Lyft", "MetroCard"],
    }
    
    for category, keywords in categories.items():
        if any(keyword in description for keyword in keywords):
            return category
    return "Other"

# Function to process the uploaded PDF and classify transactions
def process_pdf(file):
    if file is None:
        st.error("No file uploaded.")
        return None
    
    # Extract text from PDF
    with pdfplumber.open(file) as pdf:
        text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

    # Extract transactions (Modify based on your statement format)
    lines = text.split("\n")
    transactions = [line for line in lines if any(char.isdigit() for char in line)]  

    # Convert to DataFrame
    df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])

    # Ensure no missing descriptions
    df["Description"] = df["Description"].fillna("Unknown")

    # Apply classification
    df["Category"] = df["Description"].apply(classify_transaction)

    return df  # Return DataFrame

# Streamlit UI
st.title("πŸ“„ Credit Card Statement Classifier")
st.write("Upload a **PDF bank/credit card statement** to categorize transactions automatically.")

uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])

if uploaded_file is not None:
    st.success("βœ… File uploaded successfully!")
    
    # Process and display transactions
    df_result = process_pdf(uploaded_file)
    
    if df_result is not None:
        st.write("### πŸ“Š Classified Transactions:")
        st.dataframe(df_result)  # Display table