File size: 6,684 Bytes
2f69907 9620396 2f69907 9620396 2f69907 9620396 2f69907 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import torch
from transformers import DistilBertTokenizer, DistilBertModel
import spacy
from typing import Dict, List, Tuple
import sqlite3
from datetime import datetime
import uuid
class AccountingNLP:
def __init__(self):
# Use DistilBERT for lower memory usage
self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
self.model = DistilBertModel.from_pretrained('distilbert-base-uncased')
# Load spaCy model
self.nlp = spacy.load("en_core_web_sm")
self.accounting_rules = self.load_accounting_rules()
# Use /app for SQLite to ensure write permissions
self.connection = sqlite3.connect('/app/accounting_db.sqlite')
self.create_tables()
def load_accounting_rules(self) -> Dict:
"""Load GAAP/IFRS rules and chart of accounts"""
return {
'accounts': {
'cash': {'type': 'Asset', 'normal_balance': 'Debit'},
'office_supplies': {'type': 'Asset', 'normal_balance': 'Debit'},
'accounts_payable': {'type': 'Liability', 'normal_balance': 'Credit'}
},
'rules': {
'purchase': {
'debit': ['office_supplies'],
'credit': ['cash', 'accounts_payable']
}
}
}
def create_tables(self):
"""Create database tables for general ledger and journal entries"""
cursor = self.connection.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS journal_entries (
entry_id TEXT PRIMARY KEY,
date TEXT,
account TEXT,
debit REAL,
credit REAL,
description TEXT
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS general_ledger (
account TEXT,
balance REAL,
last_updated TEXT
)
''')
self.connection.commit()
def process_input(self, text: str) -> Dict:
"""Process natural language input and extract intent and entities"""
doc = self.nlp(text)
entities = {
'amount': None,
'account': None,
'date': None,
'payment_method': None
}
# Extract entities
for ent in doc.ents:
if ent.label_ == "MONEY":
entities['amount'] = float(ent.text.replace('$', ''))
elif ent.label_ == "DATE":
entities['date'] = ent.text
elif ent.text.lower() in self.accounting_rules['accounts']:
entities['account'] = ent.text.lower()
# Simple intent classification
intent = 'record_transaction' if 'record' in text.lower() else 'query'
return {'intent': intent, 'entities': entities, 'raw_text': text}
def generate_journal_entry(self, processed_input: Dict) -> List[Dict]:
"""Generate double-entry journal entries"""
if processed_input['intent'] != 'record_transaction':
return []
entities = processed_input['entities']
entry_id = str(uuid.uuid4())
date = entities['date'] or datetime.now().strftime('%Y-%m-%d')
entries = []
if entities['account'] in self.accounting_rules['rules']['purchase']['debit']:
# Debit entry
entries.append({
'entry_id': entry_id,
'date': date,
'account': entities['account'],
'debit': entities['amount'],
'credit': 0.0,
'description': processed_input['raw_text']
})
# Credit entry (assuming cash payment for simplicity)
entries.append({
'entry_id': entry_id,
'date': date,
'account': 'cash',
'debit': 0.0,
'credit': entities['amount'],
'description': processed_input['raw_text']
})
return entries
def validate_transaction(self, entries: List[Dict]) -> Tuple[bool, str]:
"""Validate journal entries for double-entry compliance"""
total_debit = sum(entry['debit'] for entry in entries)
total_credit = sum(entry['credit'] for entry in entries)
if total_debit != total_credit:
return False, "Debits and credits must balance"
if not entries:
return False, "No valid entries generated"
return True, "Valid transaction"
def update_ledger(self, entries: List[Dict]):
"""Update general ledger with validated entries"""
cursor = self.connection.cursor()
for entry in entries:
cursor.execute('''
INSERT INTO journal_entries (entry_id, date, account, debit, credit, description)
VALUES (?, ?, ?, ?, ?, ?)
''', (
entry['entry_id'],
entry['date'],
entry['account'],
entry['debit'],
entry['credit'],
entry['description']
))
# Update general ledger balance
cursor.execute('''
INSERT OR REPLACE INTO general_ledger (account, balance, last_updated)
VALUES (?,
(SELECT COALESCE((SELECT balance FROM general_ledger WHERE account = ?), 0)
+ ? - ?),
?)
''', (
entry['account'],
entry['account'],
entry['debit'],
entry['credit'],
entry['date']
))
self.connection.commit()
def generate_response(self, processed_input: Dict, entries: List[Dict]) -> str:
"""Generate natural language response"""
if processed_input['intent'] == 'record_transaction':
is_valid, message = self.validate_transaction(entries)
if is_valid:
self.update_ledger(entries)
return f"Successfully recorded transaction: {processed_input['raw_text']}"
return f"Error: {message}"
return "Query processing not implemented"
def process(self, text: str) -> str:
"""Main processing pipeline"""
processed_input = self.process_input(text)
entries = self.generate_journal_entry(processed_input)
return self.generate_response(processed_input, entries)
# Example usage
if __name__ == "__main__":
accounting_ai = AccountingNLP()
result = accounting_ai.process("Record a $500 office supplies purchase paid by check")
print(result) |