tt-dart commited on Nov 25, 2024

Commit

d834d9d

1 Parent(s): e91a58c

update readme

Browse files

Files changed (34) hide show

.gitignore +2 -0
{finetune → NL2HLTLTranslator}/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/data-00000-of-00001.arrow +0 -0
NL2HLTLTranslator/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json +3 -0
NL2HLTLTranslator/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/state.json +3 -0
{finetune → NL2HLTLTranslator}/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/data-00000-of-00001.arrow +0 -0
NL2HLTLTranslator/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json +3 -0
NL2HLTLTranslator/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/state.json +3 -0
{finetune → NL2HLTLTranslator}/Llama2_13b/llama_dp2_patch.py +0 -0
{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_fintune.py +0 -0
{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_fintune_ver2.py +0 -0
{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_fintune_ver3_qlora.py +0 -0
{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_test.py +1 -1
{finetune → NL2HLTLTranslator}/Llama2_13b/llama_test.py +0 -0
{finetune → NL2HLTLTranslator}/MIT_NL2TL/NL2TL.py +0 -0
{finetune → NL2HLTLTranslator}/T5_XXL/t5_lora_evaluate.py +0 -0
{finetune → NL2HLTLTranslator}/T5_XXL/t5_lora_fintune.py +0 -0
{finetune → NL2HLTLTranslator}/T5_XXL/t5_realtime_evaluate.py +0 -0
{finetune → NL2HLTLTranslator}/__init__.py +0 -0
{finetune → NL2HLTLTranslator}/data_augmentation/GPTbasedAug.py +0 -0
{finetune → NL2HLTLTranslator}/data_augmentation/dataset_creator.py +0 -0
NL2HLTLTranslator/fastapi_server.py +398 -0
{finetune → NL2HLTLTranslator}/mistral7b/finetune.py +2 -2
{finetune → NL2HLTLTranslator}/mistral7b/prediction.py +6 -6
{finetune → NL2HLTLTranslator}/mistral7b/test.py +0 -0
{finetune → NL2HLTLTranslator}/realtime_run.py +0 -0
{finetune → NL2HLTLTranslator}/test.py +0 -0
NL2HLTLTranslator/utils/util.py +449 -0
NL2TL-dataset/collect2/getUniqueLTL.py +2 -2
README.md +13 -1
finetune/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json +0 -69
finetune/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/state.json +0 -13
finetune/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json +0 -69
finetune/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/state.json +0 -13
setup.py +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.egg-info
2	+ *.pyc

{finetune → NL2HLTLTranslator}/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/data-00000-of-00001.arrow RENAMED Viewed

File without changes

NL2HLTLTranslator/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abd1c5e1bcff098b3f091b0b6e7161643fe38547c1b323eba17dd050461845c5
+size 1370

NL2HLTLTranslator/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6caef2d70554a357d04d62d5e8a5c1f2ead2c9eb245f2688f1ea714b858e5a95
+size 249

{finetune → NL2HLTLTranslator}/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/data-00000-of-00001.arrow RENAMED Viewed

File without changes

NL2HLTLTranslator/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abd1c5e1bcff098b3f091b0b6e7161643fe38547c1b323eba17dd050461845c5
+size 1370

NL2HLTLTranslator/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45e111fa90561c16eaea253bbb292557a4b7c17acb1f152096af221bc775ddcb
+size 250

{finetune → NL2HLTLTranslator}/Llama2_13b/llama_dp2_patch.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_fintune.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_fintune_ver2.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_fintune_ver3_qlora.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/Llama2_13b/llama_lora_test.py RENAMED Viewed

@@ -22,7 +22,7 @@ os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
 class Llama_NL2TL_translator():
     def __init__(self,
-                 output_dir = "/home/icl-mill19/xsj/model_weight",
                  tuned_model_name="llama2_13b__mid_asciiaug1",
                 #  CUDA_device='0',
                  quat=True) -> None:

 class Llama_NL2TL_translator():
     def __init__(self,
+                 output_dir = "path/to/model_weight",
                  tuned_model_name="llama2_13b__mid_asciiaug1",
                 #  CUDA_device='0',
                  quat=True) -> None:

{finetune → NL2HLTLTranslator}/Llama2_13b/llama_test.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/MIT_NL2TL/NL2TL.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/T5_XXL/t5_lora_evaluate.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/T5_XXL/t5_lora_fintune.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/T5_XXL/t5_realtime_evaluate.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/__init__.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/data_augmentation/GPTbasedAug.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/data_augmentation/dataset_creator.py RENAMED Viewed

File without changes

NL2HLTLTranslator/fastapi_server.py ADDED Viewed

	@@ -0,0 +1,398 @@

+"""
+modified by xsj
+This script implements an API for the ChatGLM3-6B model,
+formatted similarly to OpenAI's API (https://platform.openai.com/docs/api-reference/chat).
+It's designed to be run as a web server using FastAPI and uvicorn,
+making the ChatGLM3-6B model accessible through OpenAI Client.
+Key Components and Features:
+- Model and Tokenizer Setup: Configures the model and tokenizer paths and loads them.
+- FastAPI Configuration: Sets up a FastAPI application with CORS middleware for handling cross-origin requests.
+- API Endpoints:
+  - "/v1/models": Lists the available models, specifically ChatGLM3-6B.
+  - "/v1/chat/completions": Processes chat completion requests with options for streaming and regular responses.
+  - "/v1/embeddings": Processes Embedding request of a list of text inputs.
+- Token Limit Caution: In the OpenAI API, 'max_tokens' is equivalent to HuggingFace's 'max_new_tokens', not 'max_length'.
+For instance, setting 'max_tokens' to 8192 for a 6b model would result in an error due to the model's inability to output
+that many tokens after accounting for the history and prompt tokens.
+- Stream Handling and Custom Functions: Manages streaming responses and custom function calls within chat responses.
+- Pydantic Models: Defines structured models for requests and responses, enhancing API documentation and type safety.
+- Main Execution: Initializes the model and tokenizer, and starts the FastAPI app on the designated host and port.
+Note:
+    This script doesn't include the setup for special tokens or multi-GPU support by default.
+    Users need to configure their special tokens and can enable multi-GPU support as per the provided instructions.
+    Embedding Models only support in One GPU.
+"""
+import os
+import time
+import tiktoken
+import torch
+import uvicorn
+from fastapi import FastAPI, HTTPException, Response
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+from typing import List, Literal, Optional, Union
+from loguru import logger
+from pydantic import BaseModel, Field
+from transformers import AutoTokenizer, AutoModel
+# from utils import process_response, generate_chatglm3, generate_stream_chatglm3
+from sentence_transformers import SentenceTransformer
+from sse_starlette.sse import EventSourceResponse
+# from NL2HLTLtaskPlanner.finetune.Llama2_13b.llama_lora_test import Llama_NL2TL_translator as NL2TL_translator
+from NL2HLTLTranslator.mistral7b.prediction import Mistral_NL2TL_translator as NL2TL_translator
+# Set up limit request time
+EventSourceResponse.DEFAULT_PING_INTERVAL = 1000
+# set LLM path
+output_dir = os.path.join(os.path.dirname(__file__),"../")
+tuned_model_name="mistral7b_quat8"
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    yield
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+app = FastAPI(lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class ModelCard(BaseModel):
+    id: str
+    object: str = "model"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    owned_by: str = "owner"
+    root: Optional[str] = None
+    parent: Optional[str] = None
+    permission: Optional[list] = None
+class ModelList(BaseModel):
+    object: str = "list"
+    data: List[ModelCard] = []
+class FunctionCallResponse(BaseModel):
+    name: Optional[str] = None
+    arguments: Optional[str] = None
+class ChatMessage(BaseModel):
+    role: Literal["user", "assistant", "system", "function"]
+    content: str = None
+    name: Optional[str] = None
+    function_call: Optional[FunctionCallResponse] = None
+class DeltaMessage(BaseModel):
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    content: Optional[str] = None
+    function_call: Optional[FunctionCallResponse] = None
+## for Embedding
+class EmbeddingRequest(BaseModel):
+    input: List[str]
+    model: str
+class CompletionUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class EmbeddingResponse(BaseModel):
+    data: list
+    model: str
+    object: str
+    usage: CompletionUsage
+# for ChatCompletionRequest
+class UsageInfo(BaseModel):
+    prompt_tokens: int = 0
+    total_tokens: int = 0
+    completion_tokens: Optional[int] = 0
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    temperature: Optional[float] = 0.8
+    top_p: Optional[float] = 0.8
+    max_tokens: Optional[int] = None
+    stream: Optional[bool] = False
+    tools: Optional[Union[dict, List[dict]]] = None
+    repetition_penalty: Optional[float] = 1.1
+class ChatCompletionResponseChoice(BaseModel):
+    index: int
+    message: ChatMessage
+    finish_reason: Literal["stop", "length", "function_call"]
+class ChatCompletionResponseStreamChoice(BaseModel):
+    delta: DeltaMessage
+    finish_reason: Optional[Literal["stop", "length", "function_call"]]
+    index: int
+class ChatCompletionResponse(BaseModel):
+    model: str
+    id: str
+    object: Literal["chat.completion", "chat.completion.chunk"]
+    choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]]
+    created: Optional[int] = Field(default_factory=lambda: int(time.time()))
+    usage: Optional[UsageInfo] = None
+@app.get("/health")
+async def health() -> Response:
+    """Health check."""
+    return Response(status_code=200)
+@app.post("/v1/embeddings", response_model=EmbeddingResponse)
+async def get_embeddings(request: EmbeddingRequest):
+    embeddings = [embedding_model.encode(text) for text in request.input]
+    embeddings = [embedding.tolist() for embedding in embeddings]
+    def num_tokens_from_string(string: str) -> int:
+        """
+        Returns the number of tokens in a text string.
+        use cl100k_base tokenizer
+        """
+        encoding = tiktoken.get_encoding('cl100k_base')
+        num_tokens = len(encoding.encode(string))
+        return num_tokens
+    response = {
+        "data": [
+            {
+                "object": "embedding",
+                "embedding": embedding,
+                "index": index
+            }
+            for index, embedding in enumerate(embeddings)
+        ],
+        "model": request.model,
+        "object": "list",
+        "usage": CompletionUsage(
+            prompt_tokens=sum(len(text.split()) for text in request.input),
+            completion_tokens=0,
+            total_tokens=sum(num_tokens_from_string(text) for text in request.input),
+        )
+    }
+    return response
+@app.get("/v1/models", response_model=ModelList)
+async def list_models():
+    model_card = ModelCard(
+        id="chatglm3-6b"
+    )
+    return ModelList(
+        data=[model_card]
+    )
+count=0
+@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def create_chat_completion(request: ChatCompletionRequest):
+    global model, tokenizer, LLM
+    if len(request.messages) < 1 or request.messages[-1].role == "assistant":
+        raise HTTPException(status_code=400, detail="Invalid request")
+    gen_params = dict(
+        messages=request.messages,
+        temperature=request.temperature,
+        top_p=request.top_p,
+        max_tokens=request.max_tokens or 1024,
+        echo=False,
+        stream=request.stream,
+        repetition_penalty=request.repetition_penalty,
+        tools=request.tools,
+    )
+    logger.debug(f"==== request ====\n{gen_params}")
+    # if request.stream:
+    #     # Use the stream mode to read the first few characters, if it is not a function call, direct stram output
+    #     predict_stream_generator = predict_stream(request.model, gen_params)
+    #     output = next(predict_stream_generator)
+    #     if not contains_custom_function(output):
+    #         return EventSourceResponse(predict_stream_generator, media_type="text/event-stream")
+    #     # Obtain the result directly at one time and determine whether tools needs to be called.
+    #     logger.debug(f"First result output：\n{output}")
+    #     function_call = None
+    #     if output and request.tools:
+    #         try:
+    #             function_call = process_response(output, use_tool=True)
+    #         except:
+    #             logger.warning("Failed to parse tool call")
+    #     # CallFunction
+    #     if isinstance(function_call, dict):
+    #         function_call = FunctionCallResponse(**function_call)
+    #         """
+    #         In this demo, we did not register any tools.
+    #         You can use the tools that have been implemented in our `tools_using_demo` and implement your own streaming tool implementation here.
+    #         Similar to the following method:
+    #             function_args = json.loads(function_call.arguments)
+    #             tool_response = dispatch_tool(tool_name: str, tool_params: dict)
+    #         """
+    #         tool_response = ""
+    #         if not gen_params.get("messages"):
+    #             gen_params["messages"] = []
+    #         gen_params["messages"].append(ChatMessage(
+    #             role="assistant",
+    #             content=output,
+    #         ))
+    #         gen_params["messages"].append(ChatMessage(
+    #             role="function",
+    #             name=function_call.name,
+    #             content=tool_response,
+    #         ))
+    #         # Streaming output of results after function calls
+    #         generate = predict(request.model, gen_params)
+    #         return EventSourceResponse(generate, media_type="text/event-stream")
+    #     else:
+    #         # Handled to avoid exceptions in the above parsing function process.
+    #         generate = parse_output_text(request.model, output)
+    #         return EventSourceResponse(generate, media_type="text/event-stream")
+    # Here is the handling of stream = False
+    # print("gen_params['messages'][0].content",gen_params['messages'][0].content)
+    response=LLM.translate(gen_params['messages'][0].content)
+    # print('response',response)
+    # return response
+    # response = generate_chatglm3(model, tokenizer, gen_params)
+    # # Remove the first newline character
+    # if response["text"].startswith("\n"):
+    #     response["text"] = response["text"][1:]
+    # response["text"] = response["text"].strip()
+    usage = UsageInfo()
+    # function_call, finish_reason = None, "stop"
+    # if request.tools:
+    #     try:
+    #         function_call = process_response(response["text"], use_tool=True)
+    #     except:
+    #         logger.warning("Failed to parse tool call, maybe the response is not a tool call or have been answered.")
+    # if isinstance(function_call, dict):
+    #     finish_reason = "function_call"
+    #     function_call = FunctionCallResponse(**function_call)
+    function_call = None
+    message = ChatMessage(
+        role="assistant",
+        content=response,
+        function_call=function_call if isinstance(function_call, FunctionCallResponse) else None,
+    )
+    logger.debug(f"==== message ====\n{message}")
+    choice_data = ChatCompletionResponseChoice(
+        index=0,
+        message=message,
+        finish_reason='stop',
+    )
+    # task_usage = UsageInfo.model_validate(response["usage"])
+    # for usage_key, usage_value in task_usage.model_dump().items():
+    #     setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
+    # count+=1
+    return ChatCompletionResponse(
+        model=request.model,
+        id="",  # for open_source model, id is empty
+        choices=[choice_data],
+        object="chat.completion",
+        usage=usage
+    )
+async def parse_output_text(model_id: str, value: str):
+    """
+    Directly output the text content of value
+    :param model_id:
+    :param value:
+    :return:
+    """
+    choice_data = ChatCompletionResponseStreamChoice(
+        index=0,
+        delta=DeltaMessage(role="assistant", content=value),
+        finish_reason=None
+    )
+    chunk = ChatCompletionResponse(model=model_id, id="", choices=[choice_data], object="chat.completion.chunk")
+    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+    choice_data = ChatCompletionResponseStreamChoice(
+        index=0,
+        delta=DeltaMessage(),
+        finish_reason="stop"
+    )
+    chunk = ChatCompletionResponse(model=model_id, id="", choices=[choice_data], object="chat.completion.chunk")
+    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
+    yield '[DONE]'
+def contains_custom_function(value: str) -> bool:
+    """
+    Determine whether 'function_call' according to a special function prefix.
+    For example, the functions defined in "tools_using_demo/tool_register.py" are all "get_xxx" and start with "get_"
+    [Note] This is not a rigorous judgment method, only for reference.
+    :param value:
+    :return:
+    """
+    return value and 'get_' in value
+def run(output_dir = "path/to/model_weight",  tuned_model_name="llama2_13b__mid_asciiaug1",CUDA_device='0',quat=True):
+    global LLM
+    LLM=NL2TL_translator(output_dir=output_dir,tuned_model_name= tuned_model_name,quat=quat)
+    tokenizer = LLM.tokenizer
+    model = LLM.model
+    # load Embedding
+    # embedding_model = SentenceTransformer(EMBEDDING_PATH, device="cuda")
+    uvicorn.run(app, host='0.0.0.0', port=8001, workers=1)
+if __name__ == "__main__":
+    # Load LLM
+    # on alinware mill19
+    # run()
+    # on icl-superman
+    run(output_dir=output_dir,tuned_model_name=tuned_model_name)
+    # on zju server

{finetune → NL2HLTLTranslator}/mistral7b/finetune.py RENAMED Viewed

@@ -32,8 +32,8 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
 # dataset = load_dataset("samsum")
 device='cuda'
 np.random.seed(42)
-output_dir = "/home/user/xsj/model_weight/"
-datapath='/home/user/xsj/NL2TL-dataset/collect2'
 exp_name="_mid_ascii_0327_eos_2"
 explainer_files=['LTLexplain_0.json','LTLexplain_1.json','LTLexplain_2.json','LTLexplain_3.json']
 explainer_dic={}

 # dataset = load_dataset("samsum")
 device='cuda'
 np.random.seed(42)
+output_dir = os.path.join(os.path.dirname(__file__),'../')
+datapath=os.path.join(os.path.dirname(__file__),'../NL2TL-dataset/collect2')
 exp_name="_mid_ascii_0327_eos_2"
 explainer_files=['LTLexplain_0.json','LTLexplain_1.json','LTLexplain_2.json','LTLexplain_3.json']
 explainer_dic={}

{finetune → NL2HLTLTranslator}/mistral7b/prediction.py RENAMED Viewed

@@ -9,8 +9,8 @@ from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
 # from accelerate import infer_auto_device_map,init_empty_weights
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
-from NL2HLTLtaskPlanner.utils import Task2Preplacer
-from NL2HLTLtaskPlanner.utils import LTLChecker
 import re
 from datasets import concatenate_datasets
 import numpy as np
@@ -22,8 +22,8 @@ os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
 class Mistral_NL2TL_translator():
     def __init__(self,
-                 output_dir = "/home/user/xsj/model_weight",
-                 tuned_model_name="mistral7b_mid_ascii_0327_eos_2aug1_quat8",
                 #  CUDA_device='0',
                  quat=True,
                  replacer=Task2Preplacer) -> None:
@@ -237,7 +237,7 @@ if __name__=="__main__":
     # Metric
     metric = evaluate.load("rouge")
-    datapath='/home/user/xsj/NL2TL-dataset/collect2'
     tokenized_dataset = load_dataset("json",  data_files={"train":os.path.join(datapath,"ltl_eng_train_mid_ascii_gptAuged.jsonl"),"test":os.path.join(datapath,"ltl_eng_test_mid_ascii_gptAuged.jsonl")})
     print(tokenized_dataset)
     # run predictions
@@ -276,7 +276,7 @@ if __name__=="__main__":
     eval_output=np.array([input_sentence,predictions,references]).T
     import pandas as pd
     eval_output=pd.DataFrame(eval_output)
-    pd.DataFrame.to_csv(eval_output,"/home/user/xsj/model_weight/mistral7b_mid_ascii_0327_eos_2aug1_quat8"+'/output')
         # out llama
         # Rogue1: 98.363321%
         # rouge2: 95.987820%

 # from accelerate import infer_auto_device_map,init_empty_weights
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
+from NL2HLTLTranslator.utils.util import Task2Preplacer
+from NL2HLTLTranslator.utils.util import LTLChecker
 import re
 from datasets import concatenate_datasets
 import numpy as np
 class Mistral_NL2TL_translator():
     def __init__(self,
+                 output_dir = os.path.join(os.path.dirname(__file__),'../../'),
+                 tuned_model_name="mistral7b_quat8",
                 #  CUDA_device='0',
                  quat=True,
                  replacer=Task2Preplacer) -> None:
     # Metric
     metric = evaluate.load("rouge")
+    datapath='path/to/NL2TL-dataset/collect2'
     tokenized_dataset = load_dataset("json",  data_files={"train":os.path.join(datapath,"ltl_eng_train_mid_ascii_gptAuged.jsonl"),"test":os.path.join(datapath,"ltl_eng_test_mid_ascii_gptAuged.jsonl")})
     print(tokenized_dataset)
     # run predictions
     eval_output=np.array([input_sentence,predictions,references]).T
     import pandas as pd
     eval_output=pd.DataFrame(eval_output)
+    pd.DataFrame.to_csv(eval_output,"path/to/model_weight/mistral7b_mid_ascii_0327_eos_2aug1_quat8"+'/output')
         # out llama
         # Rogue1: 98.363321%
         # rouge2: 95.987820%

{finetune → NL2HLTLTranslator}/mistral7b/test.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/realtime_run.py RENAMED Viewed

File without changes

{finetune → NL2HLTLTranslator}/test.py RENAMED Viewed

File without changes

NL2HLTLTranslator/utils/util.py ADDED Viewed

	@@ -0,0 +1,449 @@

+import json
+import re
+import sys,os
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
+def splitJSONfromTXT(txt:str=""):
+    jsons=re.findall(r"```json([\s\S]*)```",txt)
+    if len(jsons)==0:
+        jsons=re.findall(r"```JSON([\s\S]*)```",txt)
+    if len(jsons)==0:
+        jsons+=re.findall(r"```([\s\S]*)```",txt)
+    if len(jsons)==0:
+        jsons+=re.findall(r"({[\s\S]*})",txt)
+    # print(re.findall(r"```json([\s\S]+?)```",txt))
+    print('find {} JSON para\n'.format(len(jsons)))
+    for i in range(len(jsons)):
+        # jsons[i]=jsons[i].replace(" ","")
+        # # 不能这样，会全部都没有空格
+        jsons[i]=jsons[i].replace("  ","")
+        jsons[i]=jsons[i].replace("\t","")
+        # jsons[i]=jsons[i].replace('_','')
+        # jsons[i]=jsons[i].replace('.','')
+        # 这是为了消除回车后的制表符
+        while(jsons[i][0]==" "):
+            jsons[i]=jsons[i][1:]
+        while(jsons[i][-1]==" "):
+            jsons[i]=jsons[i][:-1]
+        jsons[i]=jsons[i].replace("\n","")
+        left_brace=jsons[i].count('{')
+        right_brace=jsons[i].count('}')
+        if right_brace<left_brace:
+            for i in range(left_brace-right_brace):
+                jsons[i]+='}'
+        if right_brace>left_brace:
+            for i in range(right_brace-left_brace):
+                jsons[i]=jsons[i][1:]
+        # print('jsons[i][0]',jsons[i][0])
+        if jsons[i][0]!='{':
+            jsons[i]='{'+jsons[0]
+            jsons[i]+='}'
+        # print(jsons[i])
+        # id0=0
+        # for id0 in range(len(jsons[i])):
+        #     if jsons[i][id0]=='"':
+        #         print("**no more curly brace in return\n**")
+        #         break
+        #     elif jsons[i][id0]=='{':
+        #         jsons[i]=jsons[i][id0+1:]
+        #         id=len(jsons[i])
+        #         for id in range(len(jsons[i])-1,0,-1):
+        #             if jsons[i][id]=='}':
+        #                 break
+        #         jsons[i]=jsons[i][:id]
+        #         print("**delete extra curly brace in return\n**")
+        #         break
+        # id=len(jsons[i])
+        # for id in range(len(jsons[i])-1,0,-1):
+        #     if jsons[i][id]=='}':
+        #         jsons[i]=jsons[i][:id+1]
+        #         print("**delete extra right curly brace in return\n**")
+        #         break
+        # print("[]",j,"[]")
+    # print("splitJSONfromTXT",jsons)
+    # print(jsons[0])
+    # print(json.loads(jsons[0]))
+    return jsons
+    pass
+def readMultipleLinesTillException(prompt:str="",log=False,exp_PATH=""):
+    # the last line would not be read in
+    print("--"*15)
+    lines=[" "]
+    try:
+        lines.append(input(prompt))
+        while True:
+            lines.append(input())
+    except:
+        pass
+    ret ="".join(lines)
+    if log:
+        with open(exp_PATH+'log.txt','a') as f:    #设置文件对象
+            f.write("\nINPUT:\n")
+            f.write(prompt)                 #将字符串写入文件中
+            f.write("\nOUTPUT:\n")
+            f.write(ret)
+    return ret
+# class GPTinterface
+# def GPTinterface(prompts:list,log=False,exp_PATH=""):
+#     pass
+# # def
+# class GPTTranslater():
+#     def __init__(self) -> None:
+#         pass
+#     def translate(self,prompt:str="",log=False,exp_PATH=""):
+#         return GPTinterface(prompt=prompt,log=log,exp_PATH=exp_PATH)
+import re
+class Task2Preplacer():
+    def __init__(self,input:str=""):
+        # self.input=input
+        self.Task2PDict={}
+        self.P2TaskDict={}
+        self.count=0
+        pass
+    def mapping1(self,name):
+        name=name[0]
+        while not (name[-1]<='9' and name[-1]>='0'):
+            name=name[:-1]
+        if not (name in self.Task2PDict):
+            self.count+=1
+            self.Task2PDict[name]="P{:0>2d}".format(self.count)
+            self.P2TaskDict["P{:0>2d}".format(self.count)]=name
+        # print('141',self.Task2PDict,self.P2TaskDict)
+        return self.Task2PDict[name]
+        pass
+    def mapping2(self,name):
+        name=name[0]
+        return "{}".format(self.P2TaskDict[name])
+        pass
+    def reTask2P(self,Taskinput:str=""):
+        self.Task2PDict={}
+        self.P2TaskDict={}
+        self.count=0
+        self.Poutput=re.sub("Task[_0-9\.]+",self.mapping1,Taskinput)
+        print('self.Poutput',self.Poutput,'\n')
+        return self.Poutput
+    def reP2Task(self,Pinput:str=""):
+        print("self.P2TaskDict",self.P2TaskDict,'\n')
+        print("pinput",Pinput,'\n')
+        self.Taskoutput=re.sub("P[0-9]{2}",self.mapping2,Pinput)
+        return self.Taskoutput
+class Func2Preplacer():
+    def __init__(self,functionlist,input:str=""):
+        # self.input=input
+        self.functionlist=functionlist
+        self.Func2Tasklist=[]
+        pass
+    def reFunc2P(self,Taskinput:str=""):
+        def mapping(name):
+            name=name[0]
+            if not name in self.Task2Plist:
+                self.Task2Plist+=[name]
+            return "P{:0>2d}".format(self.Task2Plist.index(name))
+            pass
+        for i,func in enumerate(self.functionlist):
+            Taskinput=re.sub(func[0][:-2]+"\([\S]+]\)",mapping,Taskinput)
+            # Taskinput=re.sub(func[0][:-2]+"\([\s\S]+]\)",mapping,Taskinput)
+            # may be this is the proper one
+        self.Poutput=Taskinput
+        return self.Poutput
+    def reP2Func(self,Pinput:str=""):
+        def mapping(name):
+            name=name[0]
+            return "{}".format(self.Task2Plist[int(name[1:])])
+            pass
+        self.Taskoutput=re.sub("P[0-9]{2}",mapping,Pinput)
+        return self.Taskoutput
+class getFunc2HierarchicalAP():
+    def __init__(self,functionlist,input:str=""):
+        # self.input=input
+        self.functionlist=functionlist
+        self.HierarchicalEleList=dict()
+        # a dict to save the AP function and AP name
+        # a dict to save the CP Task and CP name
+        self.CP_count=0
+        self.CP_start=100
+        self.AP_count=0
+        self.AP_start=10
+    def AsciiLTL2FormalLTL(self,LTLInput:str,task:str,LTLtype:str="CP"):
+        # LTLtype=CP/AP means the element in the LTL
+        if LTLtype in ["CP","cp"]:
+            return self.reTask2HierarchicalLTL(LTLInput)
+            pass
+        elif LTLtype in ["ap","AP"]:
+            return self.reFunc2HierarchicalLTL(LTLInput)
+        elif LTLtype in ["sp","SP"]:
+            self.reTask2HierarchicalLTL(task)
+            return LTLInput
+            pass
+        pass
+    def mapAP2Function(self,AP:int):
+        # AP is the int number of the task
+        if AP in self.HierarchicalEleList:
+            return self.HierarchicalEleList[AP]
+        else:
+            return False
+    def mapTask2CP(self,TaskName:str):
+        return "p{:0>3d}".format(self.HierarchicalEleList[TaskName])
+    def mapping1(self,name):
+        name=name[0]
+        # print('name ',name, name in self.HierarchicalEleList)
+        print('mapping',name)
+        if not (name in self.HierarchicalEleList):
+            self.AP_count+=1
+            self.HierarchicalEleList[self.AP_count+self.AP_start]=name
+            self.HierarchicalEleList[name]=self.AP_count+self.AP_start
+        return "p{:0>2d}".format(self.HierarchicalEleList[name])
+        pass
+    def reFunc2HierarchicalLTL(self,Taskinput:str=""):
+        for i,func in enumerate(self.functionlist):
+            # print('taskinput ',Taskinput)
+            # Taskinput=re.sub(func[0][:-2]+"\([\S]+]\)",mapping,Taskinput)
+            Taskinput=re.sub(func[0][:-2]+"\([^\)]+\)",self.mapping1,Taskinput)
+        self.Poutput=Taskinput
+        return self.Poutput
+    # def reTask2Func(self,Pinput:str=""):
+    #     def mapping(name):
+    #         return "{}".format(self.Task2Plist[int(name[1:])])
+    #         pass
+    #     self.Taskoutput=re.sub("P[0-9]{2}",mapping,Pinput)
+    #     return self.Taskoutput
+    def mapping2(self,name):
+        name=name[0]
+        while not (name[-1]<='9' and name[-1]>='0'):
+            name=name[:-1]
+        print('mapping2',name)
+        if not name in self.HierarchicalEleList:
+            self.CP_count+=1
+            self.HierarchicalEleList[self.CP_count+self.CP_start]=name
+            self.HierarchicalEleList[name]=self.CP_count+self.CP_start
+        return "p{:0>3d}".format(self.HierarchicalEleList[name])
+        pass
+    def reTask2HierarchicalLTL(self,Taskinput:str=""):
+        self.Poutput=re.sub("Task_[0-9\.]+",self.mapping2,Taskinput)
+        return self.Poutput
+    # def reP2Task(self,Pinput:str=""):
+    #     print("self.Task2Plist",self.Task2Plist,'\n')
+    #     print("pinput",Pinput,'\n')
+    #     def mapping(name):
+    #         return "{}".format(self.Task2Plist[int(name[1:])])
+    #         pass
+    #     self.Taskoutput=re.sub("P[0-9]{2}",mapping,Pinput)
+    #     return self.Taskoutput
+# Task_1.1 and Task_1.2 can occur independently and either may be executed without affecting the other."
+class FuncParamExtractor():
+    def __init__(self,functionDefine) -> None:
+        self.functionlist=functionDefine
+        self.ParamPattern=re.compile("[\(,]([^,\)]+)")
+        pass
+    def extractParam(self,inputFunc:str=''):
+        return self.ParamPattern.findall(inputFunc)
+    def extractFunc(self,inputFunc:str=''):
+        for i,func in enumerate(self.functionlist):
+            Taskinput=re.search(func[0][:-2],inputFunc)
+            if Taskinput:
+                break
+            # Taskinput=re.sub(func[0][:-2]+"\([\S]+]\)",mapping,Taskinput)
+            # Taskinput=re.sub(func[0][:-2]+"\([\s\S]+]\)",mapping,Taskinput)
+            # may be this is the proper one
+        return Taskinput.group()
+class LTLChecker():
+    def __init__(self,APpattern=re.compile("(P[0-9]{2})")) -> None:
+        self.APpattern=APpattern
+    def AP_CorrCheck(self,natural:str="",ltl:str=""):
+        natural_AP=self.APpattern.findall(natural)
+        ltl_AP=self.APpattern.findall(ltl)
+        # natural_AP.sort()
+        # ltl_AP.sort()
+        return set(natural_AP)==set(ltl_AP)
+    def right_barkets_remover(self,ltl:str):
+        ltl=ltl.strip()
+        while ltl.count("(")<ltl.count(")"):
+            if ltl[-1]==')':
+                ltl=ltl[:-1].strip()
+            else:
+                break
+        return ltl
+    def brackets_Check(self,ltl:str):
+        return ltl.count("(")==ltl.count(")")
+def reAsciiLTL2EngLTL(AsciiInput:str=''):
+    # I > means ->
+    # E ^ means <>
+    # A means and
+    # N means ! ~
+    # O means |
+    AsciiInput=re.sub('A', 'And', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('O', 'Or', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('I', 'Imply', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('N', 'Not', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('E', 'Equally', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('F', 'Finally', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('G', 'Globally', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('U', 'Until', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('X', 'Next', AsciiInput, count=0, flags=0)
+    return AsciiInput
+def reEngLTL2FormalLTL(ENGInput:str=''):
+    if not isinstance(ENGInput,str):
+        return ENGInput
+    ENGInput=re.sub('And','&&', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Or','||', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Imply','->', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Not','!', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Equally','<=>', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Finally','<>', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Globally','[]', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Until','U', ENGInput, count=0, flags=0)
+    ENGInput=re.sub('Next','X', ENGInput, count=0, flags=0)
+    return ENGInput
+def reAsciiLTL2FormalLTL(AsciiInput:str=''):
+    AsciiInput=re.sub('A','&&', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('O','||', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('I','->', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('N','!', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('E','<=>', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('F','<>', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('G','[]', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('U','U', AsciiInput, count=0, flags=0)
+    AsciiInput=re.sub('X','X', AsciiInput, count=0, flags=0)
+    return AsciiInput
+if __name__=="__main__":
+    # print(getFunc2HierarchicalAP([
+    # ["Move_raw_ingredient_to_utensile()","name of ingredient", "name of utensil"],
+    # ["Move_utensil_to_certain_area()","name of utensil", "area"],
+    # ["Move_processed_ingredient_to_utensile()","name of ingredient", "name of utensil"],
+    # ["Processing_ingredient()","name of ingredient", "blue_knife/yellow_knife/hand"],
+    # ]).reFunc2HierarchicalLTL('Move_raw_ingredient_to_utensile(asd,asd)'))
+    # # print(FuncParamExtractor().extractFunc('Processing_ingredient(123,243,dfsa,3,)'))
+    # exit()
+    print(splitJSONfromTXT("""```json
+{
+   "Task_1":{
+      "task_id":"Task_1",
+      "task_instruction":"Prepare dishes by arranging fruit and vegetables and preparing eggs and meats.",
+      "task_relied_description":"We have several dishes to create, and there's a specific order for preparation.",
+      "sibling_nodes_condition":"",
+      "subtasks_of_this_node":["Task_1.1", "Task_1.2"]
+   },
+   "Task_1.1":{
+      "task_id":"Task_1.1",
+      "task_instruction":"Arrange fruit on the yellow plate then place vegetables on the blue plate.",
+      "task_relied_description":"Start by arranging the fruit on the yellow plate, followed by the vegetables on the blue plate.",
+      "sibling_nodes_condition":"",
+      "subtasks_of_this_node":["Task_1.1.1", "Task_1.1.2"]
+   },
+   "Task_1.1.1":{
+      "task_id":"Task_1.1.1",
+      "task_instruction":"Put half of the sliced tomato and the sliced watermelon on the yellow plate.",
+      "task_relied_description":"We have a tomato and a watermelon to serve as fruit.",
+      "sibling_nodes_condition":"",
+      "subtasks_of_this_node":["Task_1.1.1.1", "Task_1.1.1.2"]
+   },
+   "Task_1.1.1.1":{
+      "task_id":"Task_1.1.1.1",
+      "task_instruction":"Place tomato and watermelon on the yellow cutting board and slice each in half.",
+      "task_relied_description":"These can go on the yellow cutting board in any sequence. Slice each in half.",
+      "sibling_nodes_condition":"",
+      "subtasks_of_this_node":[]
+   },
+   "Task_1.1.1.2":{
+      "task_id":"Task_1.1.1.2",
+      "task_instruction":"Move one half of the tomato and the watermelon to the yellow plate.",
+      "task_relied_description":"then transfer one half of the tomato and the halved watermelon to the yellow plate.",
+      "sibling_nodes_condition":"After slicing the fruits",
+      "subtasks_of_this_node":[]
+   },
+   "Task_1.1.2":{
+      "task_id":"Task_1.1.2",
+      "task_instruction":"Place vegetables on the blue plate, including dividing and placing the broccoli and separating egg yolk.",
+      "task_relied_description":"For the vegetables, initially place the ingredients on the cutting board for slicing before transferring them to a plate.",
+      "sibling_nodes_condition":"After placing the fruits on the yellow plate",
+      "subtasks_of_this_node":["Task_1.1.2.1", "Task_1.1.2.2"]
+   },
+   "Task_1.1.2.1":{
+      "task_id":"Task_1.1.2.1",
+      "task_instruction":"Place broccoli on blue cutting board, divide it into two pieces, and then move both pieces to the blue plate.",
+      "task_relied_description":"For the broccoli, lay it on the blue cutting board and divide it into two pieces. Afterwards, move both pieces to the blue plate.",
+      "sibling_nodes_condition":"",
+      "subtasks_of_this_node":[]
+   },
+   "Task_1.1.2.2":{
+      "task_id":"Task_1.1.2.2",
+      "task_instruction":"Take an egg, remove its shell, separate the yolk from the egg and put it directly into the blue plate.",
+      "task_relied_description":"When it comes to the egg yolk, first take an egg but don't place it on the cutting board, Directly separate the yolk into the blue plate after removing the shell.",
+      "sibling_nodes_condition":"After placing the broccoli",
+      "subtasks_of_this_node":[]
+   },
+   "Task_1.2":{
+      "task_id":"Task_1.2",
+      "task_instruction":"Prepare the eggs and meats.",
+      "task_relied_description":"eggs and meats should be prepped last.",
+      "sibling_nodes_condition":"After arranging the fruits and vegetables",
+      "subtasks_of_this_node":[]
+   }
+}
+```"""))
+    print(reAsciiLTL2EngLTL("F ( P11 A ( F P04 ) )"))
+    print(splitJSONfromTXT("""
+     {
+   "Task_1": {
+      "task_id": "Task_1",
+      "task_instruction": "Prepare and cut banana, apple, onion and pepper. Follow rule: Do not cut fruits on a cutting board that has been used for cutting vegetables.",
+      "sibling_nodes_condition": "NA",
+      "subtasks_of_this_node":["Task_1.1", "Task_1.2", "Task_1.3"]
+   },
+   "Task_1.1": {
+      "task_id": "Task_1.1",
+      "task_instruction": "Prepare and cut banana and apple. Do not use a cutting board that has been used for cutting vegetables.",
+      "sibling_nodes_condition": "NA",
+      "subtasks_of_this_node":["Task_1.1.1", "Task_1.1.2"]
+   },
+   "Task_1.1.1": {
+      "task_id": "Task_1.1.1",
+      "task_instruction": "Prepare and cut banana",
+      "sibling_nodes_condition": "NA",
+      "subtasks_of_this_node": []
+   },
+   "Task_1.1.2": {
+      "task_id": "Task_1.1.2",
+      "task_instruction": "Prepare and cut apple",
+      "sibling_nodes_condition": "After preparing and cutting banana",
+      "subtasks_of_this_node": []
+   },
+   "Task_1.2": {
+      "task_id": "Task_1.2",
+      "task_instruction": "Prepare and cut onion and pepper",
+      "sibling_nodes_condition": "Do not use the same cutting board as used for fruits",
+      "subtasks_of_this_node": ["Task_1.2.1", "Task_1.2.2"]
+   },
+   "Task_1.2.1": {
+      "task_id": "Task_1.2.1",
+      "task_instruction": "Prepare and cut onion",
+      "sibling_nodes_condition": "NA",
+      "subtasks_of_this_node": []
+   },
+   "Task_1.2.2": {
+      "task_id": "Task_1.2.2",
+      "task_instruction": "Prepare and cut pepper",
+      "sibling_nodes_condition": "After preparing and cutting onion",
+      "subtasks_of_this_node": []
+   },
+   "Task_1.3": {
+      "task_id": "Task_1.3",
+      "task_instruction": "Rule: Do not cut fruits on a cutting board that has been used for cutting vegetables.",
+      "sibling_nodes_condition": "NA",
+      "subtasks_of_this_node": []
+   }}"""))

NL2TL-dataset/collect2/getUniqueLTL.py CHANGED Viewed

@@ -12,9 +12,9 @@ def findUniqueLTL(paths:list):
     return ret
 if __name__=='__main__':
-    path=['/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl','/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl']
     LTLs=findUniqueLTL(paths=path)
-    with open(os.path.join('/home/user/xsj/NL2TL-dataset/collect2','NLTLsummary.json'),'w') as f :
         f.write(json.dumps(LTLs,sort_keys=False,indent=4,separators=(',',':')))

     return ret
 if __name__=='__main__':
+    path=['path/to/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl','path/to/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl']
     LTLs=findUniqueLTL(paths=path)
+    with open(os.path.join('path/to/NL2TL-dataset/collect2','NLTLsummary.json'),'w') as f :
         f.write(json.dumps(LTLs,sort_keys=False,indent=4,separators=(',',':')))

README.md CHANGED Viewed

@@ -30,7 +30,19 @@ Based task related NL2TL datasets:
     - [Lang2LTL](https://github.com/h2r/Lang2LTL)
     - [nl2spec](https://github.com/realChrisHahn2/nl2spec)
     - [NL2TL](https://github.com/yongchao98/NL2TL)
 ## Cite
 ```bibtex
 @misc{xu2024scalingnaturallanguageunderstanding,

     - [Lang2LTL](https://github.com/h2r/Lang2LTL)
     - [nl2spec](https://github.com/realChrisHahn2/nl2spec)
     - [NL2TL](https://github.com/yongchao98/NL2TL)
+## File Structure
+  - NL2HLTL
+    - NL2HLTLTranslator
+      - fastapi_server.py a FastAPI server for translate testing, will run on localhost:8001
+      - mistral7b
+       - finetune.py code for fintune
+       - prediction.py code for prediction (this version do not have sockets)
+    - mistral7b_quat8 fintuned model based on Mistral7B in quat 8
+    - NL2TL-dataset used dataset
+## Run
+```bash
+python  finetune/fastapi_server.py
+```
 ## Cite
 ```bibtex
 @misc{xu2024scalingnaturallanguageunderstanding,

finetune/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json DELETED Viewed

@@ -1,69 +0,0 @@
-{
-  "builder_name": "json",
-  "citation": "",
-  "config_name": "default",
-  "dataset_name": "json",
-  "dataset_size": 889411,
-  "description": "",
-  "download_checksums": {
-    "LTL_datasets/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl": {
-      "num_bytes": 1129386,
-      "checksum": null
-    },
-    "LTL_datasets/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl": {
-      "num_bytes": 125920,
-      "checksum": null
-    }
-  },
-  "download_size": 1255306,
-  "features": {
-    "id": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "input_ids": {
-      "feature": {
-        "dtype": "int32",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "attention_mask": {
-      "feature": {
-        "dtype": "int8",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "labels": {
-      "feature": {
-        "dtype": "int64",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "",
-  "license": "",
-  "size_in_bytes": 2144717,
-  "splits": {
-    "train": {
-      "name": "train",
-      "num_bytes": 800102,
-      "num_examples": 10621,
-      "dataset_name": "json"
-    },
-    "test": {
-      "name": "test",
-      "num_bytes": 89309,
-      "num_examples": 1181,
-      "dataset_name": "json"
-    }
-  },
-  "version": {
-    "version_str": "0.0.0",
-    "major": 0,
-    "minor": 0,
-    "patch": 0
-  }
-}

finetune/Llama2_13b/data/eval/tf-ltl_eng_test_mid_ascii_gptAuged/state.json DELETED Viewed

@@ -1,13 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "c6bf809a7a8f99a6",
-  "_format_columns": null,
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": "test"
-}

finetune/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/dataset_info.json DELETED Viewed

@@ -1,69 +0,0 @@
-{
-  "builder_name": "json",
-  "citation": "",
-  "config_name": "default",
-  "dataset_name": "json",
-  "dataset_size": 889411,
-  "description": "",
-  "download_checksums": {
-    "LTL_datasets/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl": {
-      "num_bytes": 1129386,
-      "checksum": null
-    },
-    "LTL_datasets/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl": {
-      "num_bytes": 125920,
-      "checksum": null
-    }
-  },
-  "download_size": 1255306,
-  "features": {
-    "id": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "input_ids": {
-      "feature": {
-        "dtype": "int32",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "attention_mask": {
-      "feature": {
-        "dtype": "int8",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "labels": {
-      "feature": {
-        "dtype": "int64",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "",
-  "license": "",
-  "size_in_bytes": 2144717,
-  "splits": {
-    "train": {
-      "name": "train",
-      "num_bytes": 800102,
-      "num_examples": 10621,
-      "dataset_name": "json"
-    },
-    "test": {
-      "name": "test",
-      "num_bytes": 89309,
-      "num_examples": 1181,
-      "dataset_name": "json"
-    }
-  },
-  "version": {
-    "version_str": "0.0.0",
-    "major": 0,
-    "minor": 0,
-    "patch": 0
-  }
-}

finetune/Llama2_13b/data/train/tf-ltl_eng_test_mid_ascii_gptAuged/state.json DELETED Viewed

@@ -1,13 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "afb9c85014ff4b4e",
-  "_format_columns": null,
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": "train"
-}

setup.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from setuptools import setup, find_packages
+setup(
+    name='NL2HLTLTranslator',
+    version='0.2',
+    author='xsj',
+    author_email='[email protected]',
+    description='the package is used for multi robot task execution in the aithor env, under the instruction structure of LTL',
+    packages=find_packages(),
+    install_requires=[],
+    license='MIT',
+    url='https://github.com/darrrt/NL2HLTL',
+)