pm6six commited on
Commit
f41cea2
Β·
verified Β·
1 Parent(s): 23c1fce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -2,36 +2,43 @@ import streamlit as st
2
  import pdfplumber
3
  import pandas as pd
4
 
5
- # Function to process PDF and classify transactions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def process_pdf(file):
7
  if file is None:
8
  st.error("No file uploaded.")
9
  return None
10
 
11
- # Extract text from the uploaded PDF
12
  with pdfplumber.open(file) as pdf:
13
  text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
14
 
15
- # Extract transactions (Modify based on statement format)
16
  lines = text.split("\n")
17
  transactions = [line for line in lines if any(char.isdigit() for char in line)]
18
 
19
  # Convert to DataFrame
20
  df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
21
 
22
- # Classification function (Modify as needed)
23
- def classify_transaction(description):
24
- categories = {
25
- "Grocery": ["Walmart", "Kroger", "Whole Foods"],
26
- "Dining": ["McDonald's", "Starbucks", "Chipotle"],
27
- "Bills": ["Verizon", "AT&T", "Con Edison"],
28
- "Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
29
- "Transport": ["Uber", "Lyft", "MetroCard"],
30
- }
31
- for category, keywords in categories.items():
32
- if any(keyword in description for keyword in keywords):
33
- return category
34
- return "Other"
35
 
36
  # Apply classification
37
  df["Category"] = df["Description"].apply(classify_transaction)
@@ -40,17 +47,16 @@ def process_pdf(file):
40
 
41
  # Streamlit UI
42
  st.title("πŸ“„ Credit Card Statement Classifier")
43
- st.write("Upload a PDF bank/credit card statement to categorize transactions.")
44
 
45
  uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
46
 
47
  if uploaded_file is not None:
48
- st.success("File uploaded successfully!")
49
 
50
  # Process and display transactions
51
  df_result = process_pdf(uploaded_file)
52
 
53
  if df_result is not None:
54
- st.write("### Classified Transactions:")
55
  st.dataframe(df_result) # Display table
56
-
 
2
  import pdfplumber
3
  import pandas as pd
4
 
5
+ # Function to classify transactions based on description
6
+ def classify_transaction(description):
7
+ if not isinstance(description, str): # Ensure description is a string
8
+ return "Unknown"
9
+
10
+ categories = {
11
+ "Grocery": ["Walmart", "Kroger", "Whole Foods"],
12
+ "Dining": ["McDonald's", "Starbucks", "Chipotle"],
13
+ "Bills": ["Verizon", "AT&T", "Con Edison"],
14
+ "Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
15
+ "Transport": ["Uber", "Lyft", "MetroCard"],
16
+ }
17
+
18
+ for category, keywords in categories.items():
19
+ if any(keyword in description for keyword in keywords):
20
+ return category
21
+ return "Other"
22
+
23
+ # Function to process the uploaded PDF and classify transactions
24
  def process_pdf(file):
25
  if file is None:
26
  st.error("No file uploaded.")
27
  return None
28
 
29
+ # Extract text from PDF
30
  with pdfplumber.open(file) as pdf:
31
  text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
32
 
33
+ # Extract transactions (Modify based on your statement format)
34
  lines = text.split("\n")
35
  transactions = [line for line in lines if any(char.isdigit() for char in line)]
36
 
37
  # Convert to DataFrame
38
  df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
39
 
40
+ # Ensure no missing descriptions
41
+ df["Description"] = df["Description"].fillna("Unknown")
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Apply classification
44
  df["Category"] = df["Description"].apply(classify_transaction)
 
47
 
48
  # Streamlit UI
49
  st.title("πŸ“„ Credit Card Statement Classifier")
50
+ st.write("Upload a **PDF bank/credit card statement** to categorize transactions automatically.")
51
 
52
  uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
53
 
54
  if uploaded_file is not None:
55
+ st.success("βœ… File uploaded successfully!")
56
 
57
  # Process and display transactions
58
  df_result = process_pdf(uploaded_file)
59
 
60
  if df_result is not None:
61
+ st.write("### πŸ“Š Classified Transactions:")
62
  st.dataframe(df_result) # Display table