from transformers import AutoModelForCausalLM, AutoTokenizer
# device = "cuda" # the device to load the model onto
# from huggingface_hub import login
# login()
import json
import numpy as np
import sys,os
from datasets import load_dataset
import torch
from transformers import (AutoModelForCausalLM,
                         AutoTokenizer,
                         BitsAndBytesConfig,
                         TrainingArguments,
                         pipeline,
                         logging,
                         TrainerCallback)
from peft import LoraConfig, PeftConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer
from accelerate import infer_auto_device_map,init_empty_weights
import wandb
from datasets import concatenate_datasets
import numpy as np
# sys.path.append('../../../')
# sys.path.append('../../')
# sys.path.append('../')
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ['CUDA_VISIBLE_DEVICES'] = "5,6,7" 
# device = torch.device("cuda:0-6" if torch.cuda.is_available() else "cpu")
sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
# import utils.util as util
# Load dataset from the hub
# dataset = load_dataset("samsum")
device='cuda'
np.random.seed(42)
output_dir = os.path.join(os.path.dirname(__file__),'../')
datapath=os.path.join(os.path.dirname(__file__),'../NL2TL-dataset/collect2')
exp_name="_mid_ascii_0327_eos_2"
explainer_files=['LTLexplain_0.json','LTLexplain_1.json','LTLexplain_2.json','LTLexplain_3.json']
explainer_dic={}
for path in explainer_files:
    with open(os.path.join(datapath,path)) as f:
        LTLlist=json.load(f)
    for key in LTLlist.keys():
        if isinstance(LTLlist[key],dict):
            if not (key in explainer_dic):
                explainer_dic[key]=[]
            explainer_dic[key].append(LTLlist[key]['translate'])
            sp=LTLlist[key]['explain'].split("means that")
            if len(sp)>1:
                explainer_dic[key].append(sp[1])
    
base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_use_double_quant = False,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = getattr(torch, "float16")
)
bnb_config = BitsAndBytesConfig(
    load_in_8bit = True,
    # llm_int8_threshold=200.0
    # bnb_4bit_use_double_quant = False,
    # bnb_4bit_quant_type = 'nf4',
    # bnb_4bit_compute_dtype = getattr(torch, "float16")
)
import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
# os.environ['CUDA_VISIBLE_DEVICES']='0'
device_map="auto"
# torch.cuda.set_device(7)
# device_map={'':torch.cuda.current_device()}
# device_map = {'':'cuda:7'}
# model_dir为模型的路径或名称
# config = AutoConfig.from_pretrained(base_model_name, trust_remote_code=True)
# with init_empty_weights():
#     base_model = AutoModelForCausalLM.from_pretrained(
#     base_model_name,
#     from_tf=bool(".ckpt" in base_model_name),
#     quantization_config=bnb_config,
#     device_map=device_map,
#     trust_remote_code=True,
#     use_auth_token=True
#     )

# map_list = {5:"15GB", 6:"15GB",7:"15GB"}    # 对应不同卡号限制的内存量
# map_list = {7:"15GB",}    # 对应不同卡号限制的内存量
# no_split_modules = base_model._no_split_modules
# device_map = infer_auto_device_map(base_model, max_memory=map_list, no_split_module_classes=no_split_modules)


dataset = load_dataset("json",  data_files={"train":os.path.join(datapath,"ltl_eng_train_mid_ascii_gptAuged.jsonl"),"test":os.path.join(datapath,"ltl_eng_test_mid_ascii_gptAuged.jsonl")})
print(dataset)


# tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
# , add_eos_token=True,trust_remote_code=True)
# NOTE no one says whether the add eos token need to be added, but if we do not add this, the generate will continue until reach the max_new_tokens
# when add add_eos_token, it always failed 
# if use this it will generate somthing other
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'
# print(tokenizer.eos_token_id)
# 2
# print(tokenizer.bos_token_id)
# 1
# print(tokenizer._convert_token_to_id(tokenizer.bos_token))

def preprocess_function(sample,padding="max_length"):
    # add prefix to the input for t5
    # print(sample[0])
    inputs=[
         f"""### Instruction:
translate natural description to linear temproal logic, first translate into a logical way, and then translate into linear temproal logic, pay specific attention to brackets '()'

### Natural Language Task:
{sample['natural'][i].strip()}

### Logic Translation:
{explainer_dic[sample['raw_ltl'][i].strip()][np.random.randint(0,len(explainer_dic[sample['raw_ltl'][i].strip()]))]}

### linear temproal logic:
{sample['raw_ltl'][i].strip()}
</s>""".lower()
    # NOTE it seems the eos is needed, the bos is not needed(the bos will be automatically added) 
    for i in (range(len(sample['natural'])))]
    # inputs = ["## [instruction]: translate natural description in to LTL: ### [natural language]:" + sample['natural'][i]+'### [LTL]:'+sample['raw_ltl'][i] for i in (range(len(sample['natural'])))]

    sample["complete_text"] = inputs
    return sample
# method1
# tokenized_dataset = dataset.map(preprocess_function, batched=True)
# method2
def preprocess_function2(sample,padding="max_length"):
    # add prefix to the input for t5
    # print(sample[0])
    inputs=[
         tokenizer.apply_chat_template(
    [
        {"role": "user", "content": "translate natural description to linear temproal logic, first translate into a logical expression, and then translate into linear temproal logic, please pay specific attention to logic grammar, the natural language task is {}".format(sample['natural'][i].strip())},
        {"role": "assistant", "content": "logic expression is {}, and LTL is {} .".format(
            explainer_dic[sample['raw_ltl'][i].strip()][np.random.randint(0,len(explainer_dic[sample['raw_ltl'][i].strip()]))],
            sample['raw_ltl'][i].strip()
            )
        },
        # {"role": "user", "content": " pay specific attention to brackets '()', linear temproal logic is"},
        # {"role": "assistant", "content": "LTL is {} .".format(
        #     sample['raw_ltl'][i].strip()
        #     )
        # }
    ],tokenize=False)
    # NOTE it seems the eos is needed, the bos is not needed(the bos will be automatically added) 
    for i in (range(len(sample['natural'])))]
    # inputs = ["## [instruction]: translate natural description in to LTL: ### [natural language]:" + sample['natural'][i]+'### [LTL]:'+sample['raw_ltl'][i] for i in (range(len(sample['natural'])))]

    sample["complete_text"] = inputs
    return sample
tokenized_dataset = dataset.map(preprocess_function2, batched=True)
print(f"Keys of tokenized dataset: {list(tokenized_dataset['train'].features)}")

# save datasets to disk for later easy loading
# tokenized_dataset["train"].save_to_disk("data/train"+exp_name)
# tokenized_dataset["test"].save_to_disk("data/eval"+exp_name)

class PeftSavingCallback(TrainerCallback):
    def on_save(self, args, state, control, **kwargs):
        checkpoint_path = os.path.join(args.output_dir, f"checkpoint-{state.global_step}")
        kwargs["model"].save_pretrained(checkpoint_path)

        if "pytorch_model.bin" in os.listdir(checkpoint_path):
            os.remove(os.path.join(checkpoint_path, "pytorch_model.bin"))
callbacks = [PeftSavingCallback]

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05,
    r=128,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"]
)


training_arguments = TrainingArguments(
    output_dir=output_dir,
    logging_dir = os.path.join(output_dir,"logs"),
    per_device_train_batch_size=1,
    num_train_epochs=3,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    save_strategy='epoch',
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio = 0.05,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="wandb",
    evaluation_strategy="epoch",
    do_eval=True,
    run_name = base_model_name+exp_name,
    disable_tqdm=False
)
import os
output_dir = os.path.join(output_dir, "mistral7b"+exp_name+'aug1_quat8')

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    from_tf=bool(".ckpt" in base_model_name),
    quantization_config=bnb_config,
    device_map=device_map,
    trust_remote_code=True,
    use_auth_token=True
)
base_model.config.use_cache = False

# More info: https://github.com/huggingface/transformers/pull/24906
base_model.config.pretraining_tp = 1 

base_model.gradient_checkpointing_enable()
base_model = prepare_model_for_kbit_training(base_model)
base_model = get_peft_model(base_model, peft_config)

trainer = SFTTrainer(
    model=base_model,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    peft_config=peft_config,
    dataset_text_field="complete_text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    callbacks=callbacks,
    packing=False,
)
wandb.login()
trainer.train()
trainer.model.save_pretrained(output_dir)
# trainer.model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

wandb.finish()

# check
print('model dir',output_dir)
from peft import AutoPeftModelForCausalLM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoPeftModelForCausalLM.from_pretrained(output_dir,
    from_tf=bool(".ckpt" in output_dir),
    quantization_config=bnb_config,
    device_map=device_map,
    trust_remote_code=True,
    use_auth_token=True
)
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
print(tokenizer.default_chat_template)
def evaluate_model(input_text):
    input_text =f"""### Instruction:
translate natural description to linear temproal logic, first translate into a logical way, and then translate into linear temproal logic, pay specific attention to brackets '()' ### Natural Language Task:
{input_text}""".lower()
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    print(inputs)
    outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), attention_mask=inputs["attention_mask"].to("cuda"), max_new_tokens=512, pad_token_id=tokenizer.eos_token_id)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def evaluate_model2(input_text):
    messages=[
        {"role": "user", "content": "translate natural description to linear temproal logic, first translate into a logical way, and then translate into linear temproal logic, pay specific attention to brackets '()', natural language task: {}".format(input_text)},
    ]

    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
    outputs = model.generate(encodeds, max_new_tokens=512)
    # , pad_token_id=tokenizer.eos_token_id)
#     input_text =f"""### Instruction:
# translate natural description to linear temproal logic, first translate into a logical way, and then translate into linear temproal logic, pay specific attention to brackets '()' ### Natural Language Task:
# {input_text}""".lower()
#     inputs = tokenizer(input_text, return_tensors="pt").to(device)
#     print(inputs)
#     outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), attention_mask=inputs["attention_mask"].to("cuda"), max_new_tokens=512, pad_token_id=tokenizer.eos_token_id)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)
# if __name__=='__main__':
import evaluate
import numpy as np
from datasets import load_from_disk
from tqdm import tqdm

# Metric
metric = evaluate.load("rouge")


# load test dataset from distk
# test_dataset = load_from_disk("data/eval"+exp_name+'/').with_format("torch")

# run predictions
# this can take ~45 minutes
import re 
pattern=re.compile("linear temproal logic is ([\S ]*)")
predictions, references,input_sentence,output_sentence=[], [] , [], []
for idx in range(len(tokenized_dataset['test']['natural'])):
    # print(sample)
    nl=tokenized_dataset['test']['natural'][idx]
    p = evaluate_model2(nl)
    # print(p,l)
    input_sentence.append(nl)

    transLTL=pattern.findall(p)
    print(p)
    if transLTL[0][-1]=='.':
        transLTL[0]=transLTL[0][:-1].strip()
    else:
        transLTL[0]=transLTL[0].strip()
    predictions.append(transLTL[0]) 
    output_sentence.append(p) 
    input_sentence.append(p)
    references.append(tokenized_dataset['test']['raw_ltl'][idx].strip())
    print(input_sentence[-1],'\nout::\n',output_sentence[-1],'\npre::\n',predictions[-1],'\nref::\n',references[-1],'\n','-'*20,'\n')

# compute metric
rogue = metric.compute(predictions=predictions, references=references, use_stemmer=True)

# print results
print(f"Rogue1: {rogue['rouge1']* 100:2f}%")
print(f"rouge2: {rogue['rouge2']* 100:2f}%")
print(f"rougeL: {rogue['rougeL']* 100:2f}%")
print(f"rougeLsum: {rogue['rougeLsum']* 100:2f}%")
eval_output=np.array([input_sentence,predictions,references]).T
import pandas as pd 
eval_output=pd.DataFrame(eval_output)
pd.DataFrame.to_csv(eval_output,output_dir+'/output')

exit()
messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
model.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])