pm6six commited on
Commit
edec0a9
Β·
verified Β·
1 Parent(s): 1482f84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -19
app.py CHANGED
@@ -2,25 +2,28 @@ import streamlit as st
2
  import pdfplumber
3
  import pandas as pd
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  # Function to classify transactions based on description
6
  def classify_transaction(description):
7
- if not isinstance(description, str): # Ensure description is a string
8
- return "Unknown"
9
-
10
- categories = {
11
- "Grocery": ["Walmart", "Kroger", "Whole Foods"],
12
- "Dining": ["McDonald's", "Starbucks", "Chipotle"],
13
- "Bills": ["Verizon", "AT&T", "Con Edison"],
14
- "Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
15
- "Transport": ["Uber", "Lyft", "MetroCard"],
16
- }
17
-
18
- for category, keywords in categories.items():
19
- if any(keyword in description for keyword in keywords):
20
  return category
21
  return "Other"
22
 
23
- # Function to process the uploaded PDF and classify transactions
24
  def process_pdf(file):
25
  if file is None:
26
  st.error("No file uploaded.")
@@ -30,24 +33,30 @@ def process_pdf(file):
30
  with pdfplumber.open(file) as pdf:
31
  text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
32
 
33
- # Extract transactions (Modify based on your statement format)
34
  lines = text.split("\n")
35
  transactions = [line for line in lines if any(char.isdigit() for char in line)]
36
 
37
  # Convert to DataFrame
38
  df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
39
 
 
 
 
40
  # Ensure no missing descriptions
41
  df["Description"] = df["Description"].fillna("Unknown")
42
 
43
  # Apply classification
44
  df["Category"] = df["Description"].apply(classify_transaction)
45
 
46
- return df # Return DataFrame
 
 
 
47
 
48
  # Streamlit UI
49
  st.title("πŸ“„ Credit Card Statement Classifier")
50
- st.write("Upload a **PDF bank/credit card statement** to categorize transactions automatically.")
51
 
52
  uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
53
 
@@ -55,8 +64,12 @@ if uploaded_file is not None:
55
  st.success("βœ… File uploaded successfully!")
56
 
57
  # Process and display transactions
58
- df_result = process_pdf(uploaded_file)
59
 
60
  if df_result is not None:
61
  st.write("### πŸ“Š Classified Transactions:")
62
- st.dataframe(df_result) # Display table
 
 
 
 
 
2
  import pdfplumber
3
  import pandas as pd
4
 
5
+ # Define category mapping based on transaction keywords
6
+ CATEGORY_MAPPING = {
7
+ "Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"],
8
+ "Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"],
9
+ "Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"],
10
+ "Rent": ["Apartment", "Rent", "Landlord", "Lease"],
11
+ "Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"],
12
+ "Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"],
13
+ "Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"],
14
+ "Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"],
15
+ "Other": []
16
+ }
17
+
18
  # Function to classify transactions based on description
19
  def classify_transaction(description):
20
+ description = str(description).lower()
21
+ for category, keywords in CATEGORY_MAPPING.items():
22
+ if any(keyword.lower() in description for keyword in keywords):
 
 
 
 
 
 
 
 
 
 
23
  return category
24
  return "Other"
25
 
26
+ # Function to process uploaded PDF and categorize transactions
27
  def process_pdf(file):
28
  if file is None:
29
  st.error("No file uploaded.")
 
33
  with pdfplumber.open(file) as pdf:
34
  text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
35
 
36
+ # Extract transactions (Modify based on statement format)
37
  lines = text.split("\n")
38
  transactions = [line for line in lines if any(char.isdigit() for char in line)]
39
 
40
  # Convert to DataFrame
41
  df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
42
 
43
+ # Ensure amount column is numeric
44
+ df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce")
45
+
46
  # Ensure no missing descriptions
47
  df["Description"] = df["Description"].fillna("Unknown")
48
 
49
  # Apply classification
50
  df["Category"] = df["Description"].apply(classify_transaction)
51
 
52
+ # Summarize total spending per category
53
+ category_summary = df.groupby("Category")["Amount"].sum().reset_index()
54
+
55
+ return df, category_summary # Return full transactions and summary
56
 
57
  # Streamlit UI
58
  st.title("πŸ“„ Credit Card Statement Classifier")
59
+ st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.")
60
 
61
  uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
62
 
 
64
  st.success("βœ… File uploaded successfully!")
65
 
66
  # Process and display transactions
67
+ df_result, category_summary = process_pdf(uploaded_file)
68
 
69
  if df_result is not None:
70
  st.write("### πŸ“Š Classified Transactions:")
71
+ st.dataframe(df_result) # Display detailed transactions
72
+
73
+ st.write("### πŸ’° Spending Summary by Category:")
74
+ st.dataframe(category_summary) # Display spending summary
75
+