Bhaskar2611 commited on
Commit
4cfc47d
·
verified ·
1 Parent(s): d05b1b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -5
app.py CHANGED
@@ -81,10 +81,10 @@ def rule_based_parser(text):
81
  """Fallback parser for structured tables with pipe delimiters"""
82
  lines = [line.strip() for line in text.split('\n') if line.strip()]
83
 
84
- # Find header line containing 'Date'
85
  header_index = None
86
  for i, line in enumerate(lines):
87
- if re.search(r'\bDate\b', line):
88
  header_index = i
89
  break
90
 
@@ -120,7 +120,10 @@ def rule_based_parser(text):
120
  def process_file(file, is_scanned):
121
  """Main processing function"""
122
  if not file:
123
- return "No file uploaded"
 
 
 
124
 
125
  file_path = file.name
126
  file_ext = os.path.splitext(file_path)[1].lower()
@@ -131,13 +134,32 @@ def process_file(file, is_scanned):
131
  elif file_ext == '.pdf':
132
  text = extract_text_from_pdf(file_path, is_scanned=is_scanned)
133
  else:
134
- return {"error": "Unsupported file format"}
 
 
 
135
 
136
  parsed_data = parse_bank_statement(text)
137
  df = pd.DataFrame(parsed_data["transactions"])
 
 
 
 
 
 
 
 
 
 
138
  return df
 
139
  except Exception as e:
140
- return f"Error: {str(e)}"
 
 
 
 
 
141
 
142
  # Gradio Interface
143
  interface = gr.Interface(
 
81
  """Fallback parser for structured tables with pipe delimiters"""
82
  lines = [line.strip() for line in text.split('\n') if line.strip()]
83
 
84
+ # Find header line containing '| Date'
85
  header_index = None
86
  for i, line in enumerate(lines):
87
+ if re.search(r'\|Date', line): # Improved pattern to match "|Date"
88
  header_index = i
89
  break
90
 
 
120
  def process_file(file, is_scanned):
121
  """Main processing function"""
122
  if not file:
123
+ return pd.DataFrame(columns=[
124
+ "Date", "Description", "Amount", "Debit",
125
+ "Credit", "Closing Balance", "Category"
126
+ ])
127
 
128
  file_path = file.name
129
  file_ext = os.path.splitext(file_path)[1].lower()
 
134
  elif file_ext == '.pdf':
135
  text = extract_text_from_pdf(file_path, is_scanned=is_scanned)
136
  else:
137
+ return pd.DataFrame(columns=[
138
+ "Date", "Description", "Amount", "Debit",
139
+ "Credit", "Closing Balance", "Category"
140
+ ])
141
 
142
  parsed_data = parse_bank_statement(text)
143
  df = pd.DataFrame(parsed_data["transactions"])
144
+
145
+ # Ensure all required columns exist
146
+ required_cols = ["date", "description", "amount", "debit",
147
+ "credit", "closing_balance", "category"]
148
+ for col in required_cols:
149
+ if col not in df.columns:
150
+ df[col] = ""
151
+
152
+ df.columns = ["Date", "Description", "Amount", "Debit",
153
+ "Credit", "Closing Balance", "Category"]
154
  return df
155
+
156
  except Exception as e:
157
+ print(f"Processing error: {str(e)}")
158
+ # Return empty DataFrame with correct columns on error
159
+ return pd.DataFrame(columns=[
160
+ "Date", "Description", "Amount", "Debit",
161
+ "Credit", "Closing Balance", "Category"
162
+ ])
163
 
164
  # Gradio Interface
165
  interface = gr.Interface(