Spaces:

sigridveronica
/

ai-news-analyzer

Running

ai-news-analyzer / external /FinGPT /fingpt /FinGPT_RAG /instruct-FinGPT /training /utils /utils.py

Sigrid De los Santos

Remove remaining binary file for Hugging Face

9df4cc0 2 months ago

5.09 kB

	# Copyright (c) Microsoft Corporation.
	# SPDX-License-Identifier: Apache-2.0

	# DeepSpeed Team
	import os
	import torch
	import random
	import numpy as np
	from transformers import set_seed
	import deepspeed
	from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus


	def print_rank_0(msg, rank=0):
	if rank <= 0:
	print(msg)


	def to_device(batch, device):
	output = {}
	for k, v in batch.items():
	try:
	output[k] = v.to(device)
	except:
	output[k] = v
	return output


	class MovingAverage:

	def __init__(self):
	self.count = 0
	self.total = 0
	self.mean = 0

	def update(self, num):
	self.total += num
	self.count += 1
	self.mean = self.total / self.count

	return self.mean


	def save_hf_format(model, tokenizer, args, sub_folder=""):
	# used to save huggingface format, so we can use it for hf.from_pretrained
	model_to_save = model.module if hasattr(model, 'module') else model
	CONFIG_NAME = "config.json"
	WEIGHTS_NAME = "pytorch_model.bin"
	output_dir = os.path.join(args.output_dir, sub_folder)
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)
	output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
	output_config_file = os.path.join(output_dir, CONFIG_NAME)
	save_dict = model_to_save.state_dict()
	for key in list(save_dict.keys()):
	if "lora" in key:
	del save_dict[key]
	torch.save(save_dict, output_model_file)
	model_to_save.config.to_json_file(output_config_file)
	tokenizer.save_vocabulary(output_dir)


	def set_random_seed(seed):
	if seed is not None:
	set_seed(seed)
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)


	def get_all_reduce_mean(tensor):
	torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM)
	tensor = tensor / torch.distributed.get_world_size()
	return tensor


	def get_optimizer_grouped_parameters(model,
	weight_decay,
	no_decay_name_list=[
	"bias", "LayerNorm.weight"
	]):
	optimizer_grouped_parameters = [
	{
	"params": [
	p for n, p in model.named_parameters()
	if (not any(nd in n
	for nd in no_decay_name_list) and p.requires_grad)
	],
	"weight_decay":
	weight_decay,
	},
	{
	"params": [
	p for n, p in model.named_parameters()
	if (any(nd in n
	for nd in no_decay_name_list) and p.requires_grad)
	],
	"weight_decay":
	0.0,
	},
	]
	return optimizer_grouped_parameters


	def _z3_params_to_fetch(param_list):
	return [
	p for p in param_list
	if hasattr(p, 'ds_id') and p.ds_status == ZeroParamStatus.NOT_AVAILABLE
	]


	def moving_average(model, model_ema, beta=0.992, device=None, zero_stage=0):
	zero_stage_3 = (zero_stage == 3)
	with torch.no_grad():
	for param, param_ema in zip(model.parameters(),
	model_ema.parameters()):
	# TODO: use prefiltering for efficiency
	params_to_fetch = _z3_params_to_fetch([param, param_ema
	]) if zero_stage_3 else []
	should_gather_param = len(params_to_fetch) > 0
	with deepspeed.zero.GatheredParameters(
	params_to_fetch, enabled=should_gather_param):
	data = param.data
	if device is not None:
	data = data.to(device)
	param_ema.data.copy_(torch.lerp(data, param_ema.data, beta))


	def save_zero_three_model(model_ema, global_rank, save_dir, zero_stage=0):
	zero_stage_3 = (zero_stage == 3)
	os.makedirs(save_dir, exist_ok=True)
	WEIGHTS_NAME = "pytorch_model.bin"
	output_model_file = os.path.join(save_dir, WEIGHTS_NAME)

	model_to_save = model_ema.module if hasattr(model_ema,
	'module') else model_ema
	if not zero_stage_3:
	if global_rank == 0:
	torch.save(model_to_save.state_dict(), output_model_file)
	else:
	output_state_dict = {}
	for k, v in model_to_save.named_parameters():

	if hasattr(v, 'ds_id'):
	with deepspeed.zero.GatheredParameters(_z3_params_to_fetch([v
	]),
	enabled=zero_stage_3):
	v_p = v.data.cpu()
	else:
	v_p = v.cpu()
	if global_rank == 0 and "lora" not in k:
	output_state_dict[k] = v_p
	if global_rank == 0:
	torch.save(output_state_dict, output_model_file)
	del output_state_dict