|
import torch |
|
from transformers import DistilBertTokenizer, DistilBertModel |
|
import spacy |
|
from typing import Dict, List, Tuple |
|
import sqlite3 |
|
from datetime import datetime |
|
import uuid |
|
|
|
class AccountingNLP: |
|
def __init__(self): |
|
|
|
self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') |
|
self.model = DistilBertModel.from_pretrained('distilbert-base-uncased') |
|
|
|
self.nlp = spacy.load("en_core_web_sm") |
|
self.accounting_rules = self.load_accounting_rules() |
|
|
|
self.connection = sqlite3.connect('/app/accounting_db.sqlite') |
|
self.create_tables() |
|
|
|
def load_accounting_rules(self) -> Dict: |
|
"""Load GAAP/IFRS rules and chart of accounts""" |
|
return { |
|
'accounts': { |
|
'cash': {'type': 'Asset', 'normal_balance': 'Debit'}, |
|
'office_supplies': {'type': 'Asset', 'normal_balance': 'Debit'}, |
|
'accounts_payable': {'type': 'Liability', 'normal_balance': 'Credit'} |
|
}, |
|
'rules': { |
|
'purchase': { |
|
'debit': ['office_supplies'], |
|
'credit': ['cash', 'accounts_payable'] |
|
} |
|
} |
|
} |
|
|
|
def create_tables(self): |
|
"""Create database tables for general ledger and journal entries""" |
|
cursor = self.connection.cursor() |
|
cursor.execute(''' |
|
CREATE TABLE IF NOT EXISTS journal_entries ( |
|
entry_id TEXT PRIMARY KEY, |
|
date TEXT, |
|
account TEXT, |
|
debit REAL, |
|
credit REAL, |
|
description TEXT |
|
) |
|
''') |
|
cursor.execute(''' |
|
CREATE TABLE IF NOT EXISTS general_ledger ( |
|
account TEXT, |
|
balance REAL, |
|
last_updated TEXT |
|
) |
|
''') |
|
self.connection.commit() |
|
|
|
def process_input(self, text: str) -> Dict: |
|
"""Process natural language input and extract intent and entities""" |
|
doc = self.nlp(text) |
|
entities = { |
|
'amount': None, |
|
'account': None, |
|
'date': None, |
|
'payment_method': None |
|
} |
|
|
|
|
|
for ent in doc.ents: |
|
if ent.label_ == "MONEY": |
|
entities['amount'] = float(ent.text.replace('$', '')) |
|
elif ent.label_ == "DATE": |
|
entities['date'] = ent.text |
|
elif ent.text.lower() in self.accounting_rules['accounts']: |
|
entities['account'] = ent.text.lower() |
|
|
|
|
|
intent = 'record_transaction' if 'record' in text.lower() else 'query' |
|
|
|
return {'intent': intent, 'entities': entities, 'raw_text': text} |
|
|
|
def generate_journal_entry(self, processed_input: Dict) -> List[Dict]: |
|
"""Generate double-entry journal entries""" |
|
if processed_input['intent'] != 'record_transaction': |
|
return [] |
|
|
|
entities = processed_input['entities'] |
|
entry_id = str(uuid.uuid4()) |
|
date = entities['date'] or datetime.now().strftime('%Y-%m-%d') |
|
entries = [] |
|
|
|
if entities['account'] in self.accounting_rules['rules']['purchase']['debit']: |
|
|
|
entries.append({ |
|
'entry_id': entry_id, |
|
'date': date, |
|
'account': entities['account'], |
|
'debit': entities['amount'], |
|
'credit': 0.0, |
|
'description': processed_input['raw_text'] |
|
}) |
|
|
|
entries.append({ |
|
'entry_id': entry_id, |
|
'date': date, |
|
'account': 'cash', |
|
'debit': 0.0, |
|
'credit': entities['amount'], |
|
'description': processed_input['raw_text'] |
|
}) |
|
|
|
return entries |
|
|
|
def validate_transaction(self, entries: List[Dict]) -> Tuple[bool, str]: |
|
"""Validate journal entries for double-entry compliance""" |
|
total_debit = sum(entry['debit'] for entry in entries) |
|
total_credit = sum(entry['credit'] for entry in entries) |
|
|
|
if total_debit != total_credit: |
|
return False, "Debits and credits must balance" |
|
if not entries: |
|
return False, "No valid entries generated" |
|
return True, "Valid transaction" |
|
|
|
def update_ledger(self, entries: List[Dict]): |
|
"""Update general ledger with validated entries""" |
|
cursor = self.connection.cursor() |
|
for entry in entries: |
|
cursor.execute(''' |
|
INSERT INTO journal_entries (entry_id, date, account, debit, credit, description) |
|
VALUES (?, ?, ?, ?, ?, ?) |
|
''', ( |
|
entry['entry_id'], |
|
entry['date'], |
|
entry['account'], |
|
entry['debit'], |
|
entry['credit'], |
|
entry['description'] |
|
)) |
|
|
|
|
|
cursor.execute(''' |
|
INSERT OR REPLACE INTO general_ledger (account, balance, last_updated) |
|
VALUES (?, |
|
(SELECT COALESCE((SELECT balance FROM general_ledger WHERE account = ?), 0) |
|
+ ? - ?), |
|
?) |
|
''', ( |
|
entry['account'], |
|
entry['account'], |
|
entry['debit'], |
|
entry['credit'], |
|
entry['date'] |
|
)) |
|
|
|
self.connection.commit() |
|
|
|
def generate_response(self, processed_input: Dict, entries: List[Dict]) -> str: |
|
"""Generate natural language response""" |
|
if processed_input['intent'] == 'record_transaction': |
|
is_valid, message = self.validate_transaction(entries) |
|
if is_valid: |
|
self.update_ledger(entries) |
|
return f"Successfully recorded transaction: {processed_input['raw_text']}" |
|
return f"Error: {message}" |
|
return "Query processing not implemented" |
|
|
|
def process(self, text: str) -> str: |
|
"""Main processing pipeline""" |
|
processed_input = self.process_input(text) |
|
entries = self.generate_journal_entry(processed_input) |
|
return self.generate_response(processed_input, entries) |
|
|
|
|
|
if __name__ == "__main__": |
|
accounting_ai = AccountingNLP() |
|
result = accounting_ai.process("Record a $500 office supplies purchase paid by check") |
|
print(result) |