|
import torch |
|
from peft import PeftModel, PeftConfig |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
|
|
|
|
exp_name="_mid_ascii" |
|
peft_model_id="finetuned_model/results"+exp_name+'2' |
|
max_target_length=128 |
|
|
|
config = PeftConfig.from_pretrained(peft_model_id) |
|
|
|
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, load_in_8bit=True, device_map="auto") |
|
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, device_map="auto") |
|
|
|
|
|
model = PeftModel.from_pretrained(model, peft_model_id, device_map="auto") |
|
model.eval() |
|
|
|
print("Peft model loaded") |
|
|
|
from datasets import load_dataset |
|
from random import randrange |
|
|
|
|
|
|
|
datapath='LTL_datasets/collect/' |
|
dataset = load_dataset("json", data_files={"train":datapath+"ltl_eng_train"+exp_name+".jsonl","test":datapath+"ltl_eng_test"+exp_name+".jsonl"}) |
|
print(dataset) |
|
sample = dataset['test'][randrange(len(dataset["test"]))] |
|
|
|
input_ids = tokenizer(sample["natural"], return_tensors="pt", truncation=True).input_ids.cuda() |
|
|
|
outputs = model.generate(input_ids=input_ids, max_new_tokens=max_target_length, do_sample=True, top_p=0.9) |
|
print(f"input sentence: {sample['natural']}\n{'---'* 20}") |
|
|
|
print(f"summary:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]}") |
|
|
|
|
|
import evaluate |
|
import numpy as np |
|
from datasets import load_from_disk |
|
from tqdm import tqdm |
|
|
|
|
|
metric = evaluate.load("rouge") |
|
|
|
def evaluate_peft_model(sample,max_target_length=128): |
|
|
|
outputs = model.generate(input_ids=sample["input_ids"].unsqueeze(0).cuda(), do_sample=True, top_p=0.9, max_new_tokens=max_target_length) |
|
prediction = tokenizer.decode(outputs[0].detach().cpu().numpy(), skip_special_tokens=True) |
|
|
|
|
|
labels = np.where(sample['labels'] != -100, sample['labels'], tokenizer.pad_token_id) |
|
|
|
labels = tokenizer.decode(labels, skip_special_tokens=True) |
|
|
|
|
|
input_sentence=" ".join(tokenizer.batch_decode(sample["input_ids"].detach().cpu().numpy(), skip_special_tokens=True)) |
|
print("input sentence: {}\n{}".format(input_sentence,'---'* 20)) |
|
|
|
|
|
print(f"pre_LTL:\n{prediction}\nexp_LTL:\n{labels}") |
|
return prediction, labels,input_sentence |
|
|
|
|
|
test_dataset = load_from_disk("data/eval"+exp_name+'/').with_format("torch") |
|
|
|
|
|
|
|
predictions, references,input_sentence= [] , [], [] |
|
for sample in tqdm(test_dataset): |
|
|
|
p,l,nl = evaluate_peft_model(sample) |
|
|
|
input_sentence.append(nl) |
|
predictions.append(p) |
|
references.append(l) |
|
|
|
|
|
rogue = metric.compute(predictions=predictions, references=references, use_stemmer=True) |
|
|
|
|
|
print(f"Rogue1: {rogue['rouge1']* 100:2f}%") |
|
print(f"rouge2: {rogue['rouge2']* 100:2f}%") |
|
print(f"rougeL: {rogue['rougeL']* 100:2f}%") |
|
print(f"rougeLsum: {rogue['rougeLsum']* 100:2f}%") |
|
eval_output=np.array([input_sentence,predictions,references]).T |
|
import pandas as pd |
|
eval_output=pd.DataFrame(eval_output) |
|
pd.DataFrame.to_csv(eval_output,peft_model_id+'/output') |
|
|
|
|
|
|
|
|
|
|