Spaces:
Build error
Build error
File size: 2,862 Bytes
d483661 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import torch
from transformers import BertTokenizerFast,BertForTokenClassification
import numpy as np
tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased")
model = BertForTokenClassification.from_pretrained("./results/checkpoint-100")
slot_label_map = {
0: "O", 1: "B-project_id", 2: "I-project_id", 3: "B-reason", 4: "I-reason",
5: "B-amount", 6: "I-amount", 7: "B-project_name", 8: "I-project_name",
9: "B-status", 10: "I-status",11: "B-riyals", 12: "I-riyals"
}
def decode_slots(tokens, predictions, slot_label_map):
slots = {}
current_slot = None
current_value = []
for token, pred_id in zip(tokens, predictions):
label = slot_label_map[pred_id]
# Handle B- and I- slots
if label.startswith("B-"): # Beginning of a new slot
if current_slot:
slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)
current_slot = label[2:] # Extract slot name
current_value = [token] # Start a new slot
elif label.startswith("I-") and current_slot == label[2:]: # Continuation of the current slot
current_value.append(token)
else: # No slot or "O"
if current_slot:
slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)
current_slot = None
current_value = []
if current_slot:
slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)
return slots
def predict_intent_and_slots(text, model, tokenizer, slot_label_map):
encoding = tokenizer(
text,
truncation=True,
padding="max_length",
max_length=128, # Same as during training
return_tensors="pt"
)
input_ids = encoding["input_ids"]
attention_mask = encoding["attention_mask"]
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
predictions = torch.argmax(logits, dim=2).squeeze().tolist()
tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
predictions = predictions[:len(tokens)]
slots = decode_slots(tokens, predictions, slot_label_map)
intent = "mock_intent"
return {"utterance": text, "slots": slots}
def get_slots(text):
result = predict_intent_and_slots(text, model, tokenizer, slot_label_map)
slots=result['slots']
return slots
# Test the model
test_text = "Hey, I need to request money for a project name Abha University and id is 123 and the amount is 500 riyals"
result = predict_intent_and_slots(test_text, model, tokenizer, slot_label_map)
print("Prediction Result:")
print(result)
|