NL2HLTL / NL2HLTLTranslator /data_augmentation /GPTbasedAug.py

update readme

d834d9d 7 months ago

4.18 kB

	import json
	import re
	import sys,os
	import numpy as np
	# sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
	from ... import utils as util
	import time
	l,r=40,2000
	# has been added: 0,40
	# the range for data to augmentation
	batch_size=20
	# number of re describe codes


	dataPath="LTL_datasets/collect/"
	data_eng_path=os.path.join(dataPath,"Cleaned_ENG.txt")
	data_ltl_path=os.path.join(dataPath,"Cleaned_LTL.txt")

	data_eng_output_path=os.path.join(dataPath,"eng_gpt_auged2.txt")
	data_ltl_output_path=os.path.join(dataPath,"ltl_mid_order_ascii_gpt_auged2.txt")
	data_src_output_path=os.path.join(dataPath,"idxsrc_gpt_auged2.txt")

	with open(data_ltl_path) as txt:
	content = txt.readlines()
	txt.close()
	ltl =np.array(content)
	with open(data_eng_path) as txt:
	content = txt.readlines()
	txt.close()
	eng =np.array(content)

	print(len(ltl))

	GPTinterface=util.GPTinterface(JSONlog=True,exp_PATH=dataPath)

	import random
	np.random.seed(42)

	idx=np.arange(len(ltl))
	np.random.shuffle(idx)

	messages=[
	{
	"role": "system",
	"content": """1. Herer are some one sentence examples in a way that is normally used to interpret the safe or co-safe property in linear temporal logic, please remember and imitate the language style in the examples below
	P02 and P03 can occur independently and either may be executed without affecting the other.
	P07 must precede P17, which in turn should precede P15, ensuring that P07 happens before P17 and P15 is done last.
	Always (P08 precedes P09) and Eventually (P08 is executed) and Eventually (P09 is executed).
	Globally, P02 should be completed before P03 eventually starts.
	Eventually, P08 and P09 should both be completed, and they can be done in any order.
	P02 and P04 can be executed concurrently, while P03 can only be executed once P02 has been completed.
	P07 must be completed before P17, P15, and P02 can be started. P17 must be completed before P15 and P02 can be started. P15 must be completedbefore P02 can be started."
	P08 is a prerequisite for P09, P09 is a prerequisite for P10, and P10 is a prerequisite for P16.
	P06 must be completed before P14, P14 must be completed before P11, and P11 must be completed before P12.
	P02 and P05 are always possible to be executed, while the possibility of executing P03 and P04 is contingent upon the completion of P02.
	P07 must be executed, and only after P07 is completed can P17 be executed, and only after P17 is completed can P15 be executed.
	P08 can be executed independently, after which P09 can be executed.
	P06 must be completed first, followed by P14, then P11, and finally P12.
	P19 is always executed first, followed by P13, then P18, and finally P05.
	P15 and P14 can be executed independently, and both should eventually be completed.
	P07 must be completed before P17, and P17 must be completed before P15
	P06 must be completed before P14 begins, and P14 must be completed before P11 begins"""
	},{
	"role":"user",
	"content":"first go to P01 and then go to P20, always avoiding P02"
	}
	]
	input_content="B. re describe this instruction using the style above\n"
	input_LTL=""
	input_idx=""
	count=0
	pattern=re.compile("[0-9]{2}\. ([\S ]*)\n")

	for i in range(l,r):
	if count>=batch_size:
	count=0
	messages[1]["content"]=input_content
	GPTreturn=GPTinterface.communicate(messages=messages)
	reDescription=pattern.findall(GPTreturn+'\n')
	if len(reDescription)==batch_size:
	with open(data_eng_output_path ,"a") as f:
	for j in reDescription:
	f.write(j)
	f.write('\n')
	with open(data_ltl_output_path,"a") as f:
	f.write(input_LTL)
	with open(data_src_output_path,"a") as f:
	f.write(input_idx)
	input_content="B. re describe this instruction using the style above\n"
	input_LTL=""
	input_idx=""
	time.sleep(np.random.random()*5)
	else:
	count+=1
	input_content+="{:0>2d}. {}".format(count,eng[idx[i]])
	input_LTL+="{}".format(ltl[idx[i]])
	input_idx+="{}\n".format(idx[i])