Spaces:

hasaniqbal777
/

OpenFactCheck-Prerelease

Sleeping

OpenFactCheck-Prerelease / src /openfactcheck /solvers /factcheckgpt /factcheckgpt_utils /data_util.py

Hasan Iqbal

Added OpenFactCheck library

8360ec7 unverified 8 months ago

3.61 kB

	import csv
	import json
	import numpy as np
	from collections import Counter
	from typing import Dict, List, Any


	def save_to_file(text, filename='error_output.txt'):
	"""Save a string to a file line by line."""
	with open(filename, 'a', encoding='utf-8') as file:
	file.write(text + '\n')


	def majority_vote(input_list):
	# Use Counter to count occurrences of each element
	counter = Counter(input_list)

	# Find the element with the maximum count (majority)
	majority_element = max(counter, key=counter.get)

	# Return the majority element
	return majority_element


	def is_float(string):
	if string.replace(".", "").isnumeric():
	return True
	else:
	return False


	def save_json(dictionary: Dict[str, Any], save_dir: str) -> None:
	# Serializing json
	json_object = json.dumps(dictionary, indent=4, ensure_ascii=False)

	# Writing to sample.json
	with open(save_dir, "w", encoding='utf-8') as outfile:
	outfile.write(json_object)


	def read_json(filepath: str) -> Dict[str, Any]:
	data = {}
	with open(filepath, 'r', encoding='utf-8') as file:
	data = json.load(file)
	return data


	def list_to_dict(data: List[Dict[str, Any]]) -> Dict[int, Any]:
	temp = {}
	for i, d in enumerate(data):
	temp[i] = d
	return temp


	def load_jsonl(path):
	data = []
	with open(path, 'r', encoding='utf-8') as reader:
	for line in reader:
	data.append(json.loads(line))
	return data


	# def load_jsonl(input_path) -> list:
	# """
	# Read list of objects from a JSON lines file.
	# """
	# data = []
	# with open(input_path, 'r', encoding='utf-8') as f:
	# for line in f:
	# data.append(json.loads(line.rstrip('\n\|\r')))
	# print('Loaded {} records from {}'.format(len(data), input_path))
	# return data

	def dump_jsonl(data, output_path, append=False):
	"""
	Write list of objects to a JSON lines file.
	"""
	mode = 'a+' if append else 'w'
	with open(output_path, mode, encoding='utf-8') as f:
	for line in data:
	json_record = json.dumps(line, ensure_ascii=False)
	f.write(json_record + '\n')
	print('Wrote {} records to {}'.format(len(data), output_path))


	def cosine(u, v):
	"""based on embeddings and calculate cosine similarity"""
	return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))


	def read_csv(input_file, quotechar=None):
	with open(input_file, "r", encoding="utf-8") as f:
	reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
	lines = []
	for line in reader:
	lines.append(line)
	return lines


	def save_csv(header, data, output_file):
	with open(output_file, 'w', encoding='UTF8', newline='') as f:
	writer = csv.writer(f, delimiter='\t')
	# write the header
	writer.writerow(header)
	# write multiple rows
	writer.writerows(data)


	def save_array(filename, embeddings):
	# save embeddings into file
	with open(filename, 'wb') as f:
	np.save(f, embeddings)


	def load_array(filename):
	with open(filename, 'rb') as f:
	a = np.load(f)
	return a


	def read_txt(input_file):
	with open(input_file, "r", encoding="utf-8") as f:
	return f.readlines()


	def save_txt(data, output_file):
	with open(output_file, "w", encoding="utf-8") as writer:
	writer.write("\n".join(data))


	def clean_text(text):
	for mark in ['"', '-', '\t', ' ']:
	for i in [5, 4, 3, 2]:
	marks = mark * i
	text = text.replace(marks, '')
	return text