File size: 3,057 Bytes
23c1fce de0992d 4938f65 edec0a9 f41cea2 edec0a9 f41cea2 edec0a9 23c1fce 4938f65 f41cea2 23c1fce de0992d 4938f65 edec0a9 de0992d edec0a9 f41cea2 de0992d 4938f65 edec0a9 86ec6d2 23c1fce edec0a9 23c1fce f41cea2 23c1fce edec0a9 23c1fce f41cea2 edec0a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import streamlit as st
import pdfplumber
import pandas as pd
# Define category mapping based on transaction keywords
CATEGORY_MAPPING = {
"Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"],
"Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"],
"Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"],
"Rent": ["Apartment", "Rent", "Landlord", "Lease"],
"Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"],
"Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"],
"Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"],
"Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"],
"Other": []
}
# Function to classify transactions based on description
def classify_transaction(description):
description = str(description).lower()
for category, keywords in CATEGORY_MAPPING.items():
if any(keyword.lower() in description for keyword in keywords):
return category
return "Other"
# Function to process uploaded PDF and categorize transactions
def process_pdf(file):
if file is None:
st.error("No file uploaded.")
return None
# Extract text from PDF
with pdfplumber.open(file) as pdf:
text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
# Extract transactions (Modify based on statement format)
lines = text.split("\n")
transactions = [line for line in lines if any(char.isdigit() for char in line)]
# Convert to DataFrame
df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
# Ensure amount column is numeric
df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce")
# Ensure no missing descriptions
df["Description"] = df["Description"].fillna("Unknown")
# Apply classification
df["Category"] = df["Description"].apply(classify_transaction)
# Summarize total spending per category
category_summary = df.groupby("Category")["Amount"].sum().reset_index()
return df, category_summary # Return full transactions and summary
# Streamlit UI
st.title("π Credit Card Statement Classifier")
st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.")
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
if uploaded_file is not None:
st.success("β
File uploaded successfully!")
# Process and display transactions
df_result, category_summary = process_pdf(uploaded_file)
if df_result is not None:
st.write("### π Classified Transactions:")
st.dataframe(df_result) # Display detailed transactions
st.write("### π° Spending Summary by Category:")
st.dataframe(category_summary) # Display spending summary
|