Spaces:

pm6six
/

testing

Sleeping

App Files Files Community

pm6six commited on Feb 9

Commit

edec0a9

verified ·

1 Parent(s): 1482f84

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -19

app.py CHANGED Viewed

@@ -2,25 +2,28 @@ import streamlit as st
 import pdfplumber
 import pandas as pd
 # Function to classify transactions based on description
 def classify_transaction(description):
-    if not isinstance(description, str):  # Ensure description is a string
-        return "Unknown"
-    categories = {
-        "Grocery": ["Walmart", "Kroger", "Whole Foods"],
-        "Dining": ["McDonald's", "Starbucks", "Chipotle"],
-        "Bills": ["Verizon", "AT&T", "Con Edison"],
-        "Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
-        "Transport": ["Uber", "Lyft", "MetroCard"],
-    }
-    for category, keywords in categories.items():
-        if any(keyword in description for keyword in keywords):
             return category
     return "Other"
-# Function to process the uploaded PDF and classify transactions
 def process_pdf(file):
     if file is None:
         st.error("No file uploaded.")
@@ -30,24 +33,30 @@ def process_pdf(file):
     with pdfplumber.open(file) as pdf:
         text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
-    # Extract transactions (Modify based on your statement format)
     lines = text.split("\n")
     transactions = [line for line in lines if any(char.isdigit() for char in line)]
     # Convert to DataFrame
     df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
     # Ensure no missing descriptions
     df["Description"] = df["Description"].fillna("Unknown")
     # Apply classification
     df["Category"] = df["Description"].apply(classify_transaction)
-    return df  # Return DataFrame
 # Streamlit UI
 st.title("📄 Credit Card Statement Classifier")
-st.write("Upload a **PDF bank/credit card statement** to categorize transactions automatically.")
 uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
@@ -55,8 +64,12 @@ if uploaded_file is not None:
     st.success("✅ File uploaded successfully!")
     # Process and display transactions
-    df_result = process_pdf(uploaded_file)
     if df_result is not None:
         st.write("### 📊 Classified Transactions:")
-        st.dataframe(df_result)  # Display table

 import pdfplumber
 import pandas as pd
+# Define category mapping based on transaction keywords
+CATEGORY_MAPPING = {
+    "Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"],
+    "Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"],
+    "Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"],
+    "Rent": ["Apartment", "Rent", "Landlord", "Lease"],
+    "Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"],
+    "Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"],
+    "Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"],
+    "Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"],
+    "Other": []
+}
 # Function to classify transactions based on description
 def classify_transaction(description):
+    description = str(description).lower()
+    for category, keywords in CATEGORY_MAPPING.items():
+        if any(keyword.lower() in description for keyword in keywords):
             return category
     return "Other"
+# Function to process uploaded PDF and categorize transactions
 def process_pdf(file):
     if file is None:
         st.error("No file uploaded.")
     with pdfplumber.open(file) as pdf:
         text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
+    # Extract transactions (Modify based on statement format)
     lines = text.split("\n")
     transactions = [line for line in lines if any(char.isdigit() for char in line)]
     # Convert to DataFrame
     df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
+    # Ensure amount column is numeric
+    df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce")
     # Ensure no missing descriptions
     df["Description"] = df["Description"].fillna("Unknown")
     # Apply classification
     df["Category"] = df["Description"].apply(classify_transaction)
+    # Summarize total spending per category
+    category_summary = df.groupby("Category")["Amount"].sum().reset_index()
+    return df, category_summary  # Return full transactions and summary
 # Streamlit UI
 st.title("📄 Credit Card Statement Classifier")
+st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.")
 uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
     st.success("✅ File uploaded successfully!")
     # Process and display transactions
+    df_result, category_summary = process_pdf(uploaded_file)
     if df_result is not None:
         st.write("### 📊 Classified Transactions:")
+        st.dataframe(df_result)  # Display detailed transactions
+        st.write("### 💰 Spending Summary by Category:")
+        st.dataframe(category_summary)  # Display spending summary