Spaces:

ashioyajotham
/

falcon_7b_coder

Runtime error

App Files Files Community

falcon_7b_coder / app.py

ashioyajotham

Update app.py

7139589 almost 2 years ago

raw

history blame

4.68 kB

	from datasets import load_dataset

	# Specify the name of the dataset
	dataset_name = "yahma/alpaca-cleaned"

	# Load the dataset from the specified name and select the "train" split
	dataset = load_dataset(dataset_name, split="train")

	# We will be loading the Falcon 7B model, applying 4bit quantization to it, and then adding LoRA adapters to the model.
	import torch

	from transformers import FalconForCausalLM, AutoTokenizer, BitsAndBytesConfig
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

	# Defining the name of the Falcon model
	model_name = "ybelkada/falcon-7b-sharded-bf16"

	# Configuring the BitsAndBytes quantization
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	)

	# Loading the Falcon model with quantization configuration
	model = FalconForCausalLM.from_pretrained(
	model_name,
	quantization_config=bnb_config,
	trust_remote_code=True
	)

	# Disabling cache usage in the model configuration
	model.config.use_cache = False

	# Load the tokenizer for the Falcon 7B model with remote code trust
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	# Set the padding token to be the same as the end-of-sequence token
	tokenizer.pad_token = tokenizer.eos_token

	# Import the necessary module for LoRA configuration
	from peft import LoraConfig

	# Define the parameters for LoRA configuration
	lora_alpha = 16
	lora_dropout = 0.1
	lora_r = 64

	# Create the LoRA configuration object
	peft_config = LoraConfig(
	lora_alpha=lora_alpha,
	lora_dropout=lora_dropout,
	r=lora_r,
	bias="none",
	task_type="CAUSAL_LM",
	target_modules=[
	"query_key_value",
	"dense",
	"dense_h_to_4h",
	"dense_4h_to_h",
	]
	)

	from transformers import TrainingArguments
	# Define the directory to save training results
	output_dir = "./results"

	# Set the batch size per device during training
	per_device_train_batch_size = 4

	# Number of steps to accumulate gradients before updating the model
	gradient_accumulation_steps = 4

	# Choose the optimizer type (e.g., "paged_adamw_32bit")
	optim = "paged_adamw_32bit"

	# Interval to save model checkpoints (every 10 steps)
	save_steps = 10

	# Interval to log training metrics (every 10 steps)
	logging_steps = 10

	# Learning rate for optimization
	learning_rate = 2e-4

	# Maximum gradient norm for gradient clipping
	max_grad_norm = 0.3

	# Maximum number of training steps
	max_steps = 50

	# Warmup ratio for learning rate scheduling
	warmup_ratio = 0.03

	# Type of learning rate scheduler (e.g., "constant")
	lr_scheduler_type = "constant"

	# Create a TrainingArguments object to configure the training process
	training_arguments = TrainingArguments(
	output_dir=output_dir,
	per_device_train_batch_size=per_device_train_batch_size,
	gradient_accumulation_steps=gradient_accumulation_steps,
	optim=optim,
	save_steps=save_steps,
	logging_steps=logging_steps,
	learning_rate=learning_rate,
	fp16=True, # Use mixed precision training (16-bit)
	max_grad_norm=max_grad_norm,
	max_steps=max_steps,
	warmup_ratio=warmup_ratio,
	group_by_length=True,
	lr_scheduler_type=lr_scheduler_type,
	)


	dataset = dataset.map(lambda x: {"text": x["input"]+x["output"]})

	# Import the SFTTrainer from the TRL library
	from trl import SFTTrainer

	# Set the maximum sequence length
	max_seq_length = 512

	# Create a trainer instance using SFTTrainer
	trainer = SFTTrainer(
	model=model,
	train_dataset=dataset,
	peft_config=peft_config,
	dataset_text_field="text",
	max_seq_length=max_seq_length,
	tokenizer=tokenizer,
	args=training_arguments,
	)


	# Iterate through the named modules of the trainer's model
	for name, module in trainer.model.named_modules():

	# Check if the name contains "norm"
	if "norm" in name:
	# Convert the module to use torch.float32 data type
	module = module.to(torch.float32)

	trainer.train()


	prompt = "Generate a python script to add prime numbers between one and ten"

	inputs = tokenizer.encode(prompt, return_tensors='pt')

	outputs = model.generate(inputs, max_length=100, temperature = .7, do_sample=True)

	completion = tokenizer.decode(outputs[0])

	print(completion)




	from transformers import AutoModelForCausalLM, AutoTokenizer

	checkpoint_name= model
	model = AutoModelForCausalLM.from_pretrained(checkpoint_name)
	tokenizer = AutoTokenizer.from_pretrained(checkpoint_name)

	prompt = "Create a gradio application that help to convert temperature in celcius into temperature in Fahrenheit"
	inputs = tokenizer(f"Question: {prompt}\n\nAnswer: ", return_tensors="pt")

	outputs = model.generate(
	inputs["input_ids"],
	temperature=0.2,
	top_p=0.95,
	max_new_tokens=200
	)

	input_len=len(inputs["input_ids"])
	print(tokenizer.decode(outputs[0][input_len:]))