import streamlit as st import pdfplumber import pandas as pd # Function to classify transactions based on description def classify_transaction(description): if not isinstance(description, str): # Ensure description is a string return "Unknown" categories = { "Grocery": ["Walmart", "Kroger", "Whole Foods"], "Dining": ["McDonald's", "Starbucks", "Chipotle"], "Bills": ["Verizon", "AT&T", "Con Edison"], "Entertainment": ["Netflix", "Spotify", "Amazon Prime"], "Transport": ["Uber", "Lyft", "MetroCard"], } for category, keywords in categories.items(): if any(keyword in description for keyword in keywords): return category return "Other" # Function to process the uploaded PDF and classify transactions def process_pdf(file): if file is None: st.error("No file uploaded.") return None # Extract text from PDF with pdfplumber.open(file) as pdf: text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()]) # Extract transactions (Modify based on your statement format) lines = text.split("\n") transactions = [line for line in lines if any(char.isdigit() for char in line)] # Convert to DataFrame df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"]) # Ensure no missing descriptions df["Description"] = df["Description"].fillna("Unknown") # Apply classification df["Category"] = df["Description"].apply(classify_transaction) return df # Return DataFrame # Streamlit UI st.title("📄 Credit Card Statement Classifier") st.write("Upload a **PDF bank/credit card statement** to categorize transactions automatically.") uploaded_file = st.file_uploader("Upload PDF", type=["pdf"]) if uploaded_file is not None: st.success("✅ File uploaded successfully!") # Process and display transactions df_result = process_pdf(uploaded_file) if df_result is not None: st.write("### 📊 Classified Transactions:") st.dataframe(df_result) # Display table