Spaces:

Shariar00
/

Voice-Enabled-ERP-Assistant

Build error

File size: 2,862 Bytes

d483661



import torch
from transformers import BertTokenizerFast,BertForTokenClassification
import numpy as np


tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased")
model = BertForTokenClassification.from_pretrained("./results/checkpoint-100")  


slot_label_map = {
    0: "O", 1: "B-project_id", 2: "I-project_id", 3: "B-reason", 4: "I-reason",
    5: "B-amount", 6: "I-amount", 7: "B-project_name", 8: "I-project_name",
    9: "B-status", 10: "I-status",11: "B-riyals", 12: "I-riyals" 
}



def decode_slots(tokens, predictions, slot_label_map):
    slots = {}
    current_slot = None
    current_value = []

    for token, pred_id in zip(tokens, predictions):
        label = slot_label_map[pred_id]

        # Handle B- and I- slots
        if label.startswith("B-"):  # Beginning of a new slot
            if current_slot:
                
                slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)
            current_slot = label[2:]  # Extract slot name
            current_value = [token]  # Start a new slot
        elif label.startswith("I-") and current_slot == label[2:]:  # Continuation of the current slot
            current_value.append(token)
        else:  # No slot or "O"
            if current_slot:
                
                slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)
                current_slot = None
                current_value = []

    if current_slot: 
        slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)

    return slots


def predict_intent_and_slots(text, model, tokenizer, slot_label_map):
    encoding = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=128,  # Same as during training
        return_tensors="pt"
    )
    input_ids = encoding["input_ids"]
    attention_mask = encoding["attention_mask"]

    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=2).squeeze().tolist()

    tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
    predictions = predictions[:len(tokens)]  

   
    slots = decode_slots(tokens, predictions, slot_label_map)

    
    intent = "mock_intent" 

    return {"utterance": text, "slots": slots}

def get_slots(text):
    result = predict_intent_and_slots(text, model, tokenizer, slot_label_map)
    slots=result['slots']
    return slots

# Test the model
test_text = "Hey, I need to request money for a project name Abha University and id is 123 and the amount is 500 riyals"
result = predict_intent_and_slots(test_text, model, tokenizer, slot_label_map)

print("Prediction Result:")
print(result)