tablegpt
/

TableGPT2-7B

Model card Files Files and versions Community

TableGPT2-7B / evaluation /general_benchmarks /HumanEval /human_eval /data.py

darklight03's picture

eval (#6)

7f272e4 verified 8 months ago

1.55 kB

	import gzip
	import json
	import os
	from typing import Dict, Iterable

	ROOT = os.path.dirname(os.path.abspath(__file__))
	HUMAN_EVAL = os.path.join(ROOT, "..", "data", "HumanEval.jsonl.gz")


	def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]:
	return {task["task_id"]: task for task in stream_jsonl(evalset_file)}


	def stream_jsonl(filename: str) -> Iterable[Dict]:
	"""
	Parses each jsonl line and yields it as a dictionary
	"""
	if filename.endswith(".gz"):
	with open(filename, "rb") as gzfp:
	with gzip.open(gzfp, "rt") as fp:
	for line in fp:
	if any(not x.isspace() for x in line):
	yield json.loads(line)
	else:
	with open(filename, "r", encoding="utf-8") as fp:
	for line in fp:
	if any(not x.isspace() for x in line):
	yield json.loads(line)


	def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False):
	"""
	Writes an iterable of dictionaries to jsonl
	"""
	if append:
	mode = "ab"
	else:
	mode = "wb"
	filename = os.path.expanduser(filename)
	if filename.endswith(".gz"):
	with open(filename, mode) as fp:
	with gzip.GzipFile(fileobj=fp, mode="wb") as gzfp:
	for x in data:
	gzfp.write((json.dumps(x) + "\n").encode("utf-8"))
	else:
	with open(filename, mode) as fp:
	for x in data:
	fp.write((json.dumps(x) + "\n").encode("utf-8"))