Bhaskar2611 commited on
Commit
15c9ede
·
verified ·
1 Parent(s): 471f1d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -12
app.py CHANGED
@@ -38,20 +38,14 @@ def extract_text_from_pdf(pdf_path, is_scanned=False):
38
  def parse_bank_statement(text):
39
  """Parse bank statement using LLM with fallback to rule-based parser"""
40
  cleaned_text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
41
- safe_text = (
42
- cleaned_text
43
- .replace("{", "{{")
44
- .replace("}", "}}")
45
- .replace("%", "%%")
46
- )
47
  print(f"Original text sample: {cleaned_text[:200]}...")
48
- print(f"Safe text sample: {safe_text[:200]}...")
49
 
50
- # Craft precise prompt for LLM
51
  prompt = f"""
52
  You are a financial data parser. Extract transactions from bank statements.
53
 
54
  Given this bank statement text:
 
55
 
56
  Extract all transactions with these fields:
57
  - Date
@@ -65,10 +59,28 @@ Extract all transactions with these fields:
65
  Return JSON with "transactions" array containing these fields.
66
 
67
  Example format:
68
- {"transactions": [
69
- {"date": "2025-05-08", "description": "Company XYZ Payroll", "amount": "8315.40", "debit": "0.00", "credit": "8315.40", "closing_balance": "38315.40", "category": "Salary"},
70
- ...
71
- ]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  Rules:
74
  1. Ensure numeric fields have valid numbers (e.g., "0.00" instead of "-")
 
38
  def parse_bank_statement(text):
39
  """Parse bank statement using LLM with fallback to rule-based parser"""
40
  cleaned_text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
 
 
 
 
 
 
41
  print(f"Original text sample: {cleaned_text[:200]}...")
 
42
 
43
+ # Craft precise prompt for LLM with proper JSON escaping
44
  prompt = f"""
45
  You are a financial data parser. Extract transactions from bank statements.
46
 
47
  Given this bank statement text:
48
+ {cleaned_text}
49
 
50
  Extract all transactions with these fields:
51
  - Date
 
59
  Return JSON with "transactions" array containing these fields.
60
 
61
  Example format:
62
+ {{
63
+ "transactions": [
64
+ {{
65
+ "date": "2025-05-08",
66
+ "description": "Company XYZ Payroll",
67
+ "amount": "8315.40",
68
+ "debit": "0.00",
69
+ "credit": "8315.40",
70
+ "closing_balance": "38315.40",
71
+ "category": "Salary"
72
+ }},
73
+ {{
74
+ "date": "2025-05-19",
75
+ "description": "Whole Foods",
76
+ "amount": "142.21",
77
+ "debit": "142.21",
78
+ "credit": "0.00",
79
+ "closing_balance": "38173.19",
80
+ "category": "Groceries"
81
+ }}
82
+ ]
83
+ }}
84
 
85
  Rules:
86
  1. Ensure numeric fields have valid numbers (e.g., "0.00" instead of "-")