Spaces:
Runtime error
Runtime error
| import json | |
| # 读取JSON文件 | |
| def read_json_file(file_path): | |
| with open(file_path, "r", encoding="utf-8") as file: | |
| data = json.load(file) | |
| return data | |
| # 写入JSON文件 | |
| def write_json_file(file_path, data): | |
| with open(file_path, "w", encoding="utf-8") as file: | |
| json.dump(data, file, ensure_ascii=False, indent=2) | |
| if __name__ == "__main__": | |
| # 假设原始数据存储在data.json文件中 | |
| input_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50k.jsonl" | |
| output_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50knew.jsonl" | |
| with open(input_file_path, "r",encoding='utf-8') as input_file, open(output_file_path, "w",encoding='utf-8') as output_file: | |
| for line in input_file: | |
| json_data = json.loads(line) | |
| input_text = " ".join(json_data["input"]) | |
| output_text = " ".join(json_data["output"]) | |
| json_data["input"] = input_text | |
| json_data["output"] = output_text | |
| output_line = json.dumps(json_data,ensure_ascii=False) + "\n" | |
| output_file.write(output_line) | |
| # # 读取原始JSON文件 | |
| # with open(input_file_path, 'r', encoding='utf-8') as json_file: | |
| # data = json.load(json_file) | |
| # # 将数据写入JSONL文件 | |
| # with open(output_file_path, 'w', encoding='utf-8') as jsonl_file: | |
| # for item in data: | |
| # jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n') | |
| # # 读取JSON文件 | |
| # original_data = read_json_file(input_file_path) | |
| # # # 进行转换,去掉多余的[] | |
| # # corrected_data = [dialog[0] for dialog in original_data] | |
| # processed_data = [] | |
| # for item in original_data: | |
| # processed_item = { | |
| # "input": item["instruction"], | |
| # "output": item["output"] | |
| # } | |
| # processed_data.append(processed_item) | |
| # # 保存转换后的数据到新的JSON文件 | |
| # write_json_file(output_file_path, processed_data) | |
| # # print("数据转换完成,并保存到corrected_data.json文件中。") | |