Spaces:

LINC-BIT
/

EdgeTA

Running

App Files Files Community

EdgeTA / methods /elasticdnn /pipeline /offline /fm_to_md /base.py

LINC-BIT

Upload 1912 files

b84549f verified over 1 year ago

raw

history blame contribute delete

2.31 kB

	import torch
	from torch import nn
	from abc import ABC, abstractmethod

	from utils.dl.common.model import get_model_device, get_model_latency, get_model_size
	from utils.common.log import logger


	class FM_to_MD_Util(ABC):
	"""
	Foundation Model (FM) to Master DNN (MD), where MD is a narrower FM (with smaller width but the same depth).

	MD is pre-trained by knowledge distillation;
	Moreover, we construct the index relationship between FM and MD in this process,
	enabling the lightweight knowledge feedback from MD to FM.

	NOTE: 索引建立在master DNN权重通道和LoRA的AB之间
	"""

	@abstractmethod
	def init_md_from_fm_by_reducing_width(self, fm: nn.Module, reducing_width_ratio: int) -> nn.Module:
	raise NotImplementedError

	def init_md_from_fm_by_reducing_width_with_perf_test(self, fm: nn.Module, reducing_width_ratio: int,
	samples: torch.Tensor) -> nn.Module:
	fm_size = get_model_size(fm, True)
	fm_latency = get_model_latency(fm, (1, *list(samples.size())[1:]), 20,
	get_model_device(fm), 20, False)

	master_dnn = self.init_md_from_fm_by_reducing_width(fm, reducing_width_ratio)
	master_dnn_size = get_model_size(master_dnn, True)
	logger.debug(f'inited master DNN: {master_dnn}')
	# from utils.dl.common.model import get_module
	# print('after generating')
	# get_module(fm, 'head').debug()
	# get_module(master_dnn, 'head').debug()
	# print('test master latency')
	master_dnn_latency = get_model_latency(master_dnn, (1, *list(samples.size())[1:]), 20,
	get_model_device(master_dnn), 20, False)

	logger.info(f'init master DNN (w/o FBS yet) by reducing foundation model\'s width (by {reducing_width_ratio:d}x)')
	logger.info(f'foundation model ({fm_size:.3f}MB, {fm_latency:.4f}s/sample) -> '
	f'master DNN ({master_dnn_size:.3f}MB, {master_dnn_latency:.4f}s/sample)\n'
	f'(model size: ↓ {(fm_size / master_dnn_size):.2f}x, '
	f'latency: ↓ {(fm_latency / master_dnn_latency):.2f}x)')

	return master_dnn