Spaces:
Build error
Build error
import torch | |
from transformers import BertTokenizerFast,BertForTokenClassification | |
import numpy as np | |
tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased") | |
model = BertForTokenClassification.from_pretrained("./results/checkpoint-100") | |
slot_label_map = { | |
0: "O", 1: "B-project_id", 2: "I-project_id", 3: "B-reason", 4: "I-reason", | |
5: "B-amount", 6: "I-amount", 7: "B-project_name", 8: "I-project_name", | |
9: "B-status", 10: "I-status",11: "B-riyals", 12: "I-riyals" | |
} | |
def decode_slots(tokens, predictions, slot_label_map): | |
slots = {} | |
current_slot = None | |
current_value = [] | |
for token, pred_id in zip(tokens, predictions): | |
label = slot_label_map[pred_id] | |
# Handle B- and I- slots | |
if label.startswith("B-"): # Beginning of a new slot | |
if current_slot: | |
slots[current_slot] = tokenizer.convert_tokens_to_string(current_value) | |
current_slot = label[2:] # Extract slot name | |
current_value = [token] # Start a new slot | |
elif label.startswith("I-") and current_slot == label[2:]: # Continuation of the current slot | |
current_value.append(token) | |
else: # No slot or "O" | |
if current_slot: | |
slots[current_slot] = tokenizer.convert_tokens_to_string(current_value) | |
current_slot = None | |
current_value = [] | |
if current_slot: | |
slots[current_slot] = tokenizer.convert_tokens_to_string(current_value) | |
return slots | |
def predict_intent_and_slots(text, model, tokenizer, slot_label_map): | |
encoding = tokenizer( | |
text, | |
truncation=True, | |
padding="max_length", | |
max_length=128, # Same as during training | |
return_tensors="pt" | |
) | |
input_ids = encoding["input_ids"] | |
attention_mask = encoding["attention_mask"] | |
with torch.no_grad(): | |
outputs = model(input_ids, attention_mask=attention_mask) | |
logits = outputs.logits | |
predictions = torch.argmax(logits, dim=2).squeeze().tolist() | |
tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist()) | |
predictions = predictions[:len(tokens)] | |
slots = decode_slots(tokens, predictions, slot_label_map) | |
intent = "mock_intent" | |
return {"utterance": text, "slots": slots} | |
def get_slots(text): | |
result = predict_intent_and_slots(text, model, tokenizer, slot_label_map) | |
slots=result['slots'] | |
return slots | |
# Test the model | |
test_text = "Hey, I need to request money for a project name Abha University and id is 123 and the amount is 500 riyals" | |
result = predict_intent_and_slots(test_text, model, tokenizer, slot_label_map) | |
print("Prediction Result:") | |
print(result) | |