Spaces:

huggingface
/

keras-chatbot-arena

Paused

App Files Files Community

keras-chatbot-arena / models.py

martin-gorner

bug fix

38f8411 4 months ago

raw

history blame

4.98 kB

	import keras
	import keras_hub

	model_presets = [
	# 8B params models
	"hf://google/gemma-2-instruct-9b-keras",
	"hf://meta-llama/Llama-3.1-8B-Instruct",
	"hf://google/codegemma-7b-it-keras",
	"hf://keras/mistral_instruct_7b_en",
	"hf://keras/vicuna_1.5_7b_en",
	# "keras/gemma_1.1_instruct_7b_en", # won't fit?
	# 1-3B params models
	"hf://meta-llama/Llama-3.2-1B-Instruct",
	"hf://google/gemma-2b-it-keras",
	"hf://meta-llama/Llama-3.2-3B-Instruct",
	]

	model_labels = map(lambda s: s.removeprefix("hf://"), model_presets)
	model_labels = map(lambda s: s.removeprefix("google/"), model_labels)
	model_labels = map(lambda s: s.removeprefix("keras/"), model_labels)
	model_labels = map(lambda s: s.removeprefix("meta-llama/"), model_labels)


	def preset_to_website_url(preset):
	preset = preset.removeprefix("hf://")
	url = "http://huggingface.co/" + preset
	return url


	def get_appropriate_chat_template(preset):
	return "Vicuna" if "vicuna" in preset else "auto"


	def get_default_layout_map(preset_name, device_mesh):
	# Llama's default layout map works for mistral and vicuna
	# because their transformer layers have the same names.
	if (
	"Llama" in preset_name
	or "mistral" in preset_name
	or "vicuna" in preset_name
	):
	layout_map = keras_hub.models.Llama3Backbone.get_layout_map(device_mesh)
	# Default layout map patch:
	# This line is missing for some Llama models (TODO: fix this in keras_hub)
	layout_map["token_embedding/reverse_embeddings"] = ("batch", "model")
	return layout_map

	elif "gemma" in preset_name:
	layout_map = keras_hub.models.GemmaBackbone.get_layout_map(device_mesh)

	if "gemma-2b-" in preset_name:
	# Default layout map patch:
	# Gemma QKV weigts are shaped [NB_HEADS, EMBED_DIM, INNER_DIM]
	# Llama QKV weights are shaped [EMBED_DIM, NB_HEADS, INNER_DIM]
	# However:
	# The default layout map for KQV weights on Gemma is: (model_dim,data_dim,None)
	# Which means sharding NB_HEADS on the "model" dimension.
	# But gemma-2b-it-keras has only 1 head so this won't work: must patch it
	# TODO: fix this in the Gemma layout map in Keras hub.
	patch_key = "decoder_block.attention.(query\|key\|value).kernel"
	layout_map.pop(patch_key)
	layout_map[patch_key] = (None, "model", "batch")

	return layout_map


	def log_applied_layout_map(model):
	print("Model class:", type(model).__name__)

	if "Gemma" in type(model).__name__:
	transformer_decoder_block_name = "decoder_block_1"
	elif "Llama" in type(model).__name__: # works for Llama (Vicuna) and Llama3
	transformer_decoder_block_name = "transformer_layer_1"
	elif "Mistral" in type(model).__name__:
	transformer_decoder_block_name = "transformer_layer_1"
	else:
	print("Unknown architecture. Cannot display the applied layout.")
	return

	# See how layer sharding was applied
	embedding_layer = model.backbone.get_layer("token_embedding")
	print(embedding_layer)
	decoder_block = model.backbone.get_layer(transformer_decoder_block_name)
	print(type(decoder_block))
	for variable in embedding_layer.weights + decoder_block.weights:
	print(
	f"{variable.path:<58} \
	{str(variable.shape):<16} \
	{str(variable.value.sharding.spec):<35} \
	{str(variable.dtype)}"
	)


	def load_model(preset):
	devices = keras.distribution.list_devices()
	device_mesh = keras.distribution.DeviceMesh(
	shape=(1, len(devices)), axis_names=["batch", "model"], devices=devices
	)
	model_parallel = keras.distribution.ModelParallel(
	layout_map=get_default_layout_map(preset, device_mesh),
	batch_dim_name="batch",
	)

	with model_parallel.scope():
	# These two buggy models need this workaround to be loaded in bfloat16
	if "google/gemma-2-instruct-9b-keras" in preset:
	model = keras_hub.models.GemmaCausalLM(
	backbone=keras_hub.models.GemmaBackbone.from_preset(
	preset, dtype="bfloat16"
	),
	preprocessor=keras_hub.models.GemmaCausalLMPreprocessor.from_preset(
	preset
	),
	)
	elif "meta-llama/Llama-3.1-8B-Instruct" in preset:
	model = keras_hub.models.Llama3CausalLM(
	backbone=keras_hub.models.Llama3Backbone.from_preset(
	preset, dtype="bfloat16"
	),
	preprocessor=keras_hub.models.Llama3CausalLMPreprocessor.from_preset(
	preset
	),
	)
	else:
	model = keras_hub.models.CausalLM.from_preset(
	preset, dtype="bfloat16"
	)

	log_applied_layout_map(model)
	return model