import fitz # PyMuPDF for PDF extraction import re import unsloth import os from huggingface_hub import login from datasets import Dataset from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer from peft import LoraConfig, get_peft_model import gradio as gr from transformers import pipeline from unsloth import FastLanguageModel, is_bfloat16_supported import torch from trl import SFTTrainer max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! def extract_text_from_pdf(pdf_path): """Extract text from a PDF file""" doc = fitz.open(pdf_path) text = "\n".join([page.get_text("text") for page in doc]) return text.strip() def preprocess_text(text): """Basic text preprocessing""" return re.sub(r"\s+", " ", text).strip() pdf_text = extract_text_from_pdf("new-american-standard-bible.pdf") clean_text = preprocess_text(pdf_text) # Read the Hugging Face token from environment variables hf_token = os.getenv("access_token") if hf_token is None: raise ValueError("'access_token' is not set. Add it as a secret variable in Hugging Face Spaces.") # Log in to Hugging Face login(token=hf_token) #model_name = "meta-llama/Llama-2-7b-hf" # You can use a smaller one like "meta-llama/Llama-2-7b-chat-hf" model_name = "unsloth/llama-2-7b-chat" #tokenizer = AutoTokenizer.from_pretrained(model_name) model, tokenizer = FastLanguageModel.from_pretrained( model_name=model_name, max_seq_length=2048 ) model = FastLanguageModel.get_peft_model(model) # Create dataset data = {"text": [clean_text]} dataset = Dataset.from_dict(data) # Set a padding token manually tokenizer.pad_token = tokenizer.eos_token # Use EOS as PAD token # Alternatively, add a new custom pad token # tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # Tokenization function def tokenize_function(examples): tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512) tokens["labels"] = tokens["input_ids"].copy() # Use input as labels for text generation return tokens tokenized_datasets = dataset.map(tokenize_function, batched=True) trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset = dataset, dataset_text_field = "text", max_seq_length = max_seq_length, dataset_num_proc = 2, packing = False, # Can make training 5x faster for short sequences. args = TrainingArguments( per_device_train_batch_size = 2, gradient_accumulation_steps = 4, warmup_steps = 5, # num_train_epochs = 1, # Set this for 1 full training run. max_steps = 60, learning_rate = 2e-4, fp16 = not is_bfloat16_supported(), bf16 = is_bfloat16_supported(), logging_steps = 1, optim = "adamw_8bit", weight_decay = 0.01, lr_scheduler_type = "linear", seed = 3407, output_dir = "outputs", report_to = "none", # Use this for WandB etc ), ) def perform_training(): trainer.train() perform_training() model.save_pretrained("./llama-2-7b-chat_fine_tuned") tokenizer.save_pretrained("./llama-2-7b-chat_fine_tuned") model.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token) tokenizer.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token) # save GGUF versions model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer,) model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer, token=hf_token) model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "f16") model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "f16", token=hf_token) model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "q4_k_m") model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "q4_k_m", token=hf_token) # CHATBOT START # chatbot = pipeline("text-generation", model="jonACE/llama-2-7b-chat_fine_tuned") #def chatbot_response(prompt): # result = chatbot(prompt, max_length=100, do_sample=True, temperature=0.7) # return result[0]["generated_text"] #iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text") #iface.launch()