"""Generate json file for webpage.""" import json import os import re # models = ['llama', 'alpaca', 'gpt35', 'bard'] models = ["vicuna"] def read_jsonl(path: str, key: str = None): data = [] with open(os.path.expanduser(path)) as f: for line in f: if not line: continue data.append(json.loads(line)) if key is not None: data.sort(key=lambda x: x[key]) data = {item[key]: item for item in data} return data def trim_hanging_lines(s: str, n: int) -> str: s = s.strip() for _ in range(n): s = s.split("\n", 1)[1].strip() return s if __name__ == "__main__": questions = read_jsonl("table/question.jsonl", key="question_id") # alpaca_answers = read_jsonl('table/answer/answer_alpaca-13b.jsonl', key='question_id') # bard_answers = read_jsonl('table/answer/answer_bard.jsonl', key='question_id') # gpt35_answers = read_jsonl('table/answer/answer_gpt35.jsonl', key='question_id') # llama_answers = read_jsonl('table/answer/answer_llama-13b.jsonl', key='question_id') vicuna_answers = read_jsonl( "table/answer/answer_vicuna-13b.jsonl", key="question_id" ) ours_answers = read_jsonl( "table/results/llama-13b-hf-alpaca.jsonl", key="question_id" ) review_vicuna = read_jsonl( "table/review/review_vicuna-13b_llama-13b-hf-alpaca.jsonl", key="question_id" ) # review_alpaca = read_jsonl('table/review/review_alpaca-13b_vicuna-13b.jsonl', key='question_id') # review_bard = read_jsonl('table/review/review_bard_vicuna-13b.jsonl', key='question_id') # review_gpt35 = read_jsonl('table/review/review_gpt35_vicuna-13b.jsonl', key='question_id') # review_llama = read_jsonl('table/review/review_llama-13b_vicuna-13b.jsonl', key='question_id') records = [] for qid in questions.keys(): r = { "id": qid, "category": questions[qid]["category"], "question": questions[qid]["text"], "answers": { # 'alpaca': alpaca_answers[qid]['text'], # 'llama': llama_answers[qid]['text'], # 'bard': bard_answers[qid]['text'], # 'gpt35': gpt35_answers[qid]['text'], "vicuna": vicuna_answers[qid]["text"], "ours": ours_answers[qid]["text"], }, "evaluations": { # 'alpaca': review_alpaca[qid]['text'], # 'llama': review_llama[qid]['text'], # 'bard': review_bard[qid]['text'], "vicuna": review_vicuna[qid]["content"], # 'gpt35': review_gpt35[qid]['text'], }, "scores": { "vicuna": review_vicuna[qid]["tuple"], # 'alpaca': review_alpaca[qid]['score'], # 'llama': review_llama[qid]['score'], # 'bard': review_bard[qid]['score'], # 'gpt35': review_gpt35[qid]['score'], }, } # cleanup data cleaned_evals = {} for k, v in r["evaluations"].items(): v = v.strip() lines = v.split("\n") # trim the first line if it's a pair of numbers if re.match(r"\d+[, ]+\d+", lines[0]): lines = lines[1:] v = "\n".join(lines) cleaned_evals[k] = v.replace("Assistant 1", "**Assistant 1**").replace( "Assistant 2", "**Assistant 2**" ) r["evaluations"] = cleaned_evals records.append(r) # Reorder the records, this is optional for r in records: if r["id"] <= 20: r["id"] += 60 else: r["id"] -= 20 for r in records: if r["id"] <= 50: r["id"] += 10 elif 50 < r["id"] <= 60: r["id"] -= 50 for r in records: if r["id"] == 7: r["id"] = 1 elif r["id"] < 7: r["id"] += 1 records.sort(key=lambda x: x["id"]) # Write to file with open("webpage/data.json", "w") as f: json.dump({"questions": records, "models": models}, f, indent=2)