testing / app.py
pm6six's picture
Update app.py
de0992d verified
raw
history blame
1.58 kB
import gradio as gr
import pdfplumber
import pandas as pd
# Function to process PDF and classify transactions
def process_pdf(file):
if file is None:
return "No file uploaded."
# Extract text from the uploaded PDF
with pdfplumber.open(file.name) as pdf:
text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
# Extract transactions (Modify based on statement format)
lines = text.split("\n")
transactions = [line for line in lines if any(char.isdigit() for char in line)]
# Convert to DataFrame
df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"])
# Classification function (Modify as needed)
def classify_transaction(description):
categories = {
"Grocery": ["Walmart", "Kroger", "Whole Foods"],
"Dining": ["McDonald's", "Starbucks", "Chipotle"],
"Bills": ["Verizon", "AT&T", "Con Edison"],
"Entertainment": ["Netflix", "Spotify", "Amazon Prime"],
"Transport": ["Uber", "Lyft", "MetroCard"],
}
for category, keywords in categories.items():
if any(keyword in description for keyword in keywords):
return category
return "Other"
# Apply classification
df["Category"] = df["Description"].apply(classify_transaction)
return df # Display the table
# Gradio Interface
app = gr.Interface(fn=process_pdf, inputs=gr.File(type="file"), outputs="dataframe", title="Bank Statement Classifier")
app.launch()