Update app.py
Browse files
app.py
CHANGED
@@ -2,25 +2,28 @@ import streamlit as st
|
|
2 |
import pdfplumber
|
3 |
import pandas as pd
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# Function to classify transactions based on description
|
6 |
def classify_transaction(description):
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
categories = {
|
11 |
-
"Grocery": ["Walmart", "Kroger", "Whole Foods"],
|
12 |
-
"Dining": ["McDonald's", "Starbucks", "Chipotle"],
|
13 |
-
"Bills": ["Verizon", "AT&T", "Con Edison"],
|
14 |
-
"Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
|
15 |
-
"Transport": ["Uber", "Lyft", "MetroCard"],
|
16 |
-
}
|
17 |
-
|
18 |
-
for category, keywords in categories.items():
|
19 |
-
if any(keyword in description for keyword in keywords):
|
20 |
return category
|
21 |
return "Other"
|
22 |
|
23 |
-
# Function to process
|
24 |
def process_pdf(file):
|
25 |
if file is None:
|
26 |
st.error("No file uploaded.")
|
@@ -30,24 +33,30 @@ def process_pdf(file):
|
|
30 |
with pdfplumber.open(file) as pdf:
|
31 |
text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
|
32 |
|
33 |
-
# Extract transactions (Modify based on
|
34 |
lines = text.split("\n")
|
35 |
transactions = [line for line in lines if any(char.isdigit() for char in line)]
|
36 |
|
37 |
# Convert to DataFrame
|
38 |
df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
|
39 |
|
|
|
|
|
|
|
40 |
# Ensure no missing descriptions
|
41 |
df["Description"] = df["Description"].fillna("Unknown")
|
42 |
|
43 |
# Apply classification
|
44 |
df["Category"] = df["Description"].apply(classify_transaction)
|
45 |
|
46 |
-
|
|
|
|
|
|
|
47 |
|
48 |
# Streamlit UI
|
49 |
st.title("π Credit Card Statement Classifier")
|
50 |
-
st.write("Upload a **PDF bank/credit card statement
|
51 |
|
52 |
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
|
53 |
|
@@ -55,8 +64,12 @@ if uploaded_file is not None:
|
|
55 |
st.success("β
File uploaded successfully!")
|
56 |
|
57 |
# Process and display transactions
|
58 |
-
df_result = process_pdf(uploaded_file)
|
59 |
|
60 |
if df_result is not None:
|
61 |
st.write("### π Classified Transactions:")
|
62 |
-
st.dataframe(df_result) # Display
|
|
|
|
|
|
|
|
|
|
2 |
import pdfplumber
|
3 |
import pandas as pd
|
4 |
|
5 |
+
# Define category mapping based on transaction keywords
|
6 |
+
CATEGORY_MAPPING = {
|
7 |
+
"Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"],
|
8 |
+
"Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"],
|
9 |
+
"Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"],
|
10 |
+
"Rent": ["Apartment", "Rent", "Landlord", "Lease"],
|
11 |
+
"Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"],
|
12 |
+
"Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"],
|
13 |
+
"Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"],
|
14 |
+
"Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"],
|
15 |
+
"Other": []
|
16 |
+
}
|
17 |
+
|
18 |
# Function to classify transactions based on description
|
19 |
def classify_transaction(description):
|
20 |
+
description = str(description).lower()
|
21 |
+
for category, keywords in CATEGORY_MAPPING.items():
|
22 |
+
if any(keyword.lower() in description for keyword in keywords):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
return category
|
24 |
return "Other"
|
25 |
|
26 |
+
# Function to process uploaded PDF and categorize transactions
|
27 |
def process_pdf(file):
|
28 |
if file is None:
|
29 |
st.error("No file uploaded.")
|
|
|
33 |
with pdfplumber.open(file) as pdf:
|
34 |
text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
|
35 |
|
36 |
+
# Extract transactions (Modify based on statement format)
|
37 |
lines = text.split("\n")
|
38 |
transactions = [line for line in lines if any(char.isdigit() for char in line)]
|
39 |
|
40 |
# Convert to DataFrame
|
41 |
df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
|
42 |
|
43 |
+
# Ensure amount column is numeric
|
44 |
+
df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce")
|
45 |
+
|
46 |
# Ensure no missing descriptions
|
47 |
df["Description"] = df["Description"].fillna("Unknown")
|
48 |
|
49 |
# Apply classification
|
50 |
df["Category"] = df["Description"].apply(classify_transaction)
|
51 |
|
52 |
+
# Summarize total spending per category
|
53 |
+
category_summary = df.groupby("Category")["Amount"].sum().reset_index()
|
54 |
+
|
55 |
+
return df, category_summary # Return full transactions and summary
|
56 |
|
57 |
# Streamlit UI
|
58 |
st.title("π Credit Card Statement Classifier")
|
59 |
+
st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.")
|
60 |
|
61 |
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
|
62 |
|
|
|
64 |
st.success("β
File uploaded successfully!")
|
65 |
|
66 |
# Process and display transactions
|
67 |
+
df_result, category_summary = process_pdf(uploaded_file)
|
68 |
|
69 |
if df_result is not None:
|
70 |
st.write("### π Classified Transactions:")
|
71 |
+
st.dataframe(df_result) # Display detailed transactions
|
72 |
+
|
73 |
+
st.write("### π° Spending Summary by Category:")
|
74 |
+
st.dataframe(category_summary) # Display spending summary
|
75 |
+
|