import torch from transformers import DistilBertTokenizer, DistilBertModel import spacy from typing import Dict, List, Tuple import sqlite3 from datetime import datetime import uuid class AccountingNLP: def __init__(self): # Use DistilBERT for lower memory usage self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') self.model = DistilBertModel.from_pretrained('distilbert-base-uncased') # Load spaCy model self.nlp = spacy.load("en_core_web_sm") self.accounting_rules = self.load_accounting_rules() # Use /app for SQLite to ensure write permissions self.connection = sqlite3.connect('/app/accounting_db.sqlite') self.create_tables() def load_accounting_rules(self) -> Dict: """Load GAAP/IFRS rules and chart of accounts""" return { 'accounts': { 'cash': {'type': 'Asset', 'normal_balance': 'Debit'}, 'office_supplies': {'type': 'Asset', 'normal_balance': 'Debit'}, 'accounts_payable': {'type': 'Liability', 'normal_balance': 'Credit'} }, 'rules': { 'purchase': { 'debit': ['office_supplies'], 'credit': ['cash', 'accounts_payable'] } } } def create_tables(self): """Create database tables for general ledger and journal entries""" cursor = self.connection.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS journal_entries ( entry_id TEXT PRIMARY KEY, date TEXT, account TEXT, debit REAL, credit REAL, description TEXT ) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS general_ledger ( account TEXT, balance REAL, last_updated TEXT ) ''') self.connection.commit() def process_input(self, text: str) -> Dict: """Process natural language input and extract intent and entities""" doc = self.nlp(text) entities = { 'amount': None, 'account': None, 'date': None, 'payment_method': None } # Extract entities for ent in doc.ents: if ent.label_ == "MONEY": entities['amount'] = float(ent.text.replace('$', '')) elif ent.label_ == "DATE": entities['date'] = ent.text elif ent.text.lower() in self.accounting_rules['accounts']: entities['account'] = ent.text.lower() # Simple intent classification intent = 'record_transaction' if 'record' in text.lower() else 'query' return {'intent': intent, 'entities': entities, 'raw_text': text} def generate_journal_entry(self, processed_input: Dict) -> List[Dict]: """Generate double-entry journal entries""" if processed_input['intent'] != 'record_transaction': return [] entities = processed_input['entities'] entry_id = str(uuid.uuid4()) date = entities['date'] or datetime.now().strftime('%Y-%m-%d') entries = [] if entities['account'] in self.accounting_rules['rules']['purchase']['debit']: # Debit entry entries.append({ 'entry_id': entry_id, 'date': date, 'account': entities['account'], 'debit': entities['amount'], 'credit': 0.0, 'description': processed_input['raw_text'] }) # Credit entry (assuming cash payment for simplicity) entries.append({ 'entry_id': entry_id, 'date': date, 'account': 'cash', 'debit': 0.0, 'credit': entities['amount'], 'description': processed_input['raw_text'] }) return entries def validate_transaction(self, entries: List[Dict]) -> Tuple[bool, str]: """Validate journal entries for double-entry compliance""" total_debit = sum(entry['debit'] for entry in entries) total_credit = sum(entry['credit'] for entry in entries) if total_debit != total_credit: return False, "Debits and credits must balance" if not entries: return False, "No valid entries generated" return True, "Valid transaction" def update_ledger(self, entries: List[Dict]): """Update general ledger with validated entries""" cursor = self.connection.cursor() for entry in entries: cursor.execute(''' INSERT INTO journal_entries (entry_id, date, account, debit, credit, description) VALUES (?, ?, ?, ?, ?, ?) ''', ( entry['entry_id'], entry['date'], entry['account'], entry['debit'], entry['credit'], entry['description'] )) # Update general ledger balance cursor.execute(''' INSERT OR REPLACE INTO general_ledger (account, balance, last_updated) VALUES (?, (SELECT COALESCE((SELECT balance FROM general_ledger WHERE account = ?), 0) + ? - ?), ?) ''', ( entry['account'], entry['account'], entry['debit'], entry['credit'], entry['date'] )) self.connection.commit() def generate_response(self, processed_input: Dict, entries: List[Dict]) -> str: """Generate natural language response""" if processed_input['intent'] == 'record_transaction': is_valid, message = self.validate_transaction(entries) if is_valid: self.update_ledger(entries) return f"Successfully recorded transaction: {processed_input['raw_text']}" return f"Error: {message}" return "Query processing not implemented" def process(self, text: str) -> str: """Main processing pipeline""" processed_input = self.process_input(text) entries = self.generate_journal_entry(processed_input) return self.generate_response(processed_input, entries) # Example usage if __name__ == "__main__": accounting_ai = AccountingNLP() result = accounting_ai.process("Record a $500 office supplies purchase paid by check") print(result)