Spaces:

pm6six
/

testing

Sleeping

App Files Files Community

pm6six commited on Feb 9

Commit

f41cea2

verified ·

1 Parent(s): 23c1fce

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -20

app.py CHANGED Viewed

@@ -2,36 +2,43 @@ import streamlit as st
 import pdfplumber
 import pandas as pd
-# Function to process PDF and classify transactions
 def process_pdf(file):
     if file is None:
         st.error("No file uploaded.")
         return None
-    # Extract text from the uploaded PDF
     with pdfplumber.open(file) as pdf:
         text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
-    # Extract transactions (Modify based on statement format)
     lines = text.split("\n")
     transactions = [line for line in lines if any(char.isdigit() for char in line)]
     # Convert to DataFrame
     df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
-    # Classification function (Modify as needed)
-    def classify_transaction(description):
-        categories = {
-            "Grocery": ["Walmart", "Kroger", "Whole Foods"],
-            "Dining": ["McDonald's", "Starbucks", "Chipotle"],
-            "Bills": ["Verizon", "AT&T", "Con Edison"],
-            "Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
-            "Transport": ["Uber", "Lyft", "MetroCard"],
-        }
-        for category, keywords in categories.items():
-            if any(keyword in description for keyword in keywords):
-                return category
-        return "Other"
     # Apply classification
     df["Category"] = df["Description"].apply(classify_transaction)
@@ -40,17 +47,16 @@ def process_pdf(file):
 # Streamlit UI
 st.title("📄 Credit Card Statement Classifier")
-st.write("Upload a PDF bank/credit card statement to categorize transactions.")
 uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
 if uploaded_file is not None:
-    st.success("File uploaded successfully!")
     # Process and display transactions
     df_result = process_pdf(uploaded_file)
     if df_result is not None:
-        st.write("### Classified Transactions:")
         st.dataframe(df_result)  # Display table

 import pdfplumber
 import pandas as pd
+# Function to classify transactions based on description
+def classify_transaction(description):
+    if not isinstance(description, str):  # Ensure description is a string
+        return "Unknown"
+    categories = {
+        "Grocery": ["Walmart", "Kroger", "Whole Foods"],
+        "Dining": ["McDonald's", "Starbucks", "Chipotle"],
+        "Bills": ["Verizon", "AT&T", "Con Edison"],
+        "Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
+        "Transport": ["Uber", "Lyft", "MetroCard"],
+    }
+    for category, keywords in categories.items():
+        if any(keyword in description for keyword in keywords):
+            return category
+    return "Other"
+# Function to process the uploaded PDF and classify transactions
 def process_pdf(file):
     if file is None:
         st.error("No file uploaded.")
         return None
+    # Extract text from PDF
     with pdfplumber.open(file) as pdf:
         text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
+    # Extract transactions (Modify based on your statement format)
     lines = text.split("\n")
     transactions = [line for line in lines if any(char.isdigit() for char in line)]
     # Convert to DataFrame
     df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
+    # Ensure no missing descriptions
+    df["Description"] = df["Description"].fillna("Unknown")
     # Apply classification
     df["Category"] = df["Description"].apply(classify_transaction)
 # Streamlit UI
 st.title("📄 Credit Card Statement Classifier")
+st.write("Upload a **PDF bank/credit card statement** to categorize transactions automatically.")
 uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
 if uploaded_file is not None:
+    st.success("✅ File uploaded successfully!")
     # Process and display transactions
     df_result = process_pdf(uploaded_file)
     if df_result is not None:
+        st.write("### 📊 Classified Transactions:")
         st.dataframe(df_result)  # Display table