import fitz # PyMuPDF for PDF extraction import re import unsloth import os from huggingface_hub import login from datasets import Dataset from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer from peft import LoraConfig, get_peft_model import gradio as gr from transformers import pipeline def extract_text_from_pdf(pdf_path): """Extract text from a PDF file""" doc = fitz.open(pdf_path) text = "\n".join([page.get_text("text") for page in doc]) return text.strip() def preprocess_text(text): """Basic text preprocessing""" return re.sub(r"\s+", " ", text).strip() pdf_text = extract_text_from_pdf("new-american-standard-bible.pdf") clean_text = preprocess_text(pdf_text) # Read the Hugging Face token from environment variables hf_token = os.getenv("access_token") if hf_token is None: raise ValueError("'access_token' is not set. Add it as a secret variable in Hugging Face Spaces.") # Log in to Hugging Face login(token=hf_token) #model_name = "meta-llama/Llama-2-7b-hf" # You can use a smaller one like "meta-llama/Llama-2-7b-chat-hf" model_name = "unsloth/llama-2-7b-chat" tokenizer = AutoTokenizer.from_pretrained(model_name) # Create dataset data = {"text": [clean_text]} dataset = Dataset.from_dict(data) # Set a padding token manually tokenizer.pad_token = tokenizer.eos_token # Use EOS as PAD token # Alternatively, add a new custom pad token # tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # Tokenization function def tokenize_function(examples): tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512) tokens["labels"] = tokens["input_ids"].copy() # Use input as labels for text generation return tokens tokenized_datasets = dataset.map(tokenize_function, batched=True) # Load LLaMA 2 model in 4-bit mode to save memory model = AutoModelForCausalLM.from_pretrained( model_name, load_in_4bit=True, # Use 4-bit quantization for efficiency device_map="auto" #device_map="cpu", #quantization_config=None ) # Apply LoRA (efficient fine-tuning) lora_config = LoraConfig( r=8, # Low-rank parameter lora_alpha=32, target_modules=["q_proj", "v_proj"], # Applies only to attention layers lora_dropout=0.05 ) model = get_peft_model(model, lora_config) training_args = TrainingArguments( output_dir="./results", evaluation_strategy="no", # Disable evaluation (to enable, change value to 'epoch') learning_rate=2e-4, per_device_train_batch_size=1, # Reduce batch size for memory efficiency per_device_eval_batch_size=1, num_train_epochs=3, weight_decay=0.01, save_strategy="epoch", logging_dir="./logs", logging_steps=10, ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets, tokenizer=tokenizer, ) def perform_training(): trainer.train() perform_training() model.save_pretrained("./fine_tuned_llama2") tokenizer.save_pretrained("./fine_tuned_llama2") # CHATBOT START chatbot = pipeline("text-generation", model="./fine_tuned_llama2") def chatbot_response(prompt): result = chatbot(prompt, max_length=100, do_sample=True, temperature=0.7) return result[0]["generated_text"] iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text") iface.launch()