|
import gradio as gr |
|
import pdfplumber |
|
import pandas as pd |
|
|
|
|
|
def process_pdf(file): |
|
if file is None: |
|
return "No file uploaded." |
|
|
|
|
|
with pdfplumber.open(file.name) as pdf: |
|
text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()]) |
|
|
|
|
|
lines = text.split("\n") |
|
transactions = [line for line in lines if any(char.isdigit() for char in line)] |
|
|
|
|
|
df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"]) |
|
|
|
|
|
def classify_transaction(description): |
|
categories = { |
|
"Grocery": ["Walmart", "Kroger", "Whole Foods"], |
|
"Dining": ["McDonald's", "Starbucks", "Chipotle"], |
|
"Bills": ["Verizon", "AT&T", "Con Edison"], |
|
"Entertainment": ["Netflix", "Spotify", "Amazon Prime"], |
|
"Transport": ["Uber", "Lyft", "MetroCard"], |
|
} |
|
for category, keywords in categories.items(): |
|
if any(keyword in description for keyword in keywords): |
|
return category |
|
return "Other" |
|
|
|
|
|
df["Category"] = df["Description"].apply(classify_transaction) |
|
|
|
return df |
|
|
|
|
|
app = gr.Interface(fn=process_pdf, inputs=gr.File(type="file"), outputs="dataframe", title="Bank Statement Classifier") |
|
app.launch() |
|
|