File size: 2,625 Bytes
53daa68
02ad7ce
 
 
 
 
53daa68
 
02ad7ce
 
53daa68
 
02ad7ce
53daa68
02ad7ce
 
53daa68
02ad7ce
 
53daa68
 
02ad7ce
 
53daa68
 
 
 
 
02ad7ce
a183e25
53daa68
a183e25
02ad7ce
 
 
53daa68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02ad7ce
 
 
 
53daa68
 
 
02ad7ce
 
 
53daa68
 
02ad7ce
 
 
 
 
 
 
 
 
 
 
53daa68
 
 
 
 
02ad7ce
53daa68
02ad7ce
53daa68
 
 
02ad7ce
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import fitz  # PyMuPDF for PDF extraction
import re

def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file"""
    doc = fitz.open(pdf_path)
    text = "\n".join([page.get_text("text") for page in doc])
    return text.strip()

def preprocess_text(text):
    """Basic text preprocessing"""
    return re.sub(r"\s+", " ", text).strip()

pdf_text = extract_text_from_pdf("your_document.pdf")
clean_text = preprocess_text(pdf_text)

from datasets import Dataset
from transformers import AutoTokenizer

model_name = "meta-llama/Llama-2-7b-hf"  # You can use a smaller one like "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Create dataset
data = {"text": [clean_text]}
dataset = Dataset.from_dict(data)

# Tokenization function
def tokenize_function(examples):
    tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
    tokens["labels"] = tokens["input_ids"].copy()  # Use input as labels for text generation
    return tokens

tokenized_datasets = dataset.map(tokenize_function, batched=True)

from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

# Load LLaMA 2 model in 4-bit mode to save memory
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,  # Use 4-bit quantization for efficiency
    device_map="auto"
)

# Apply LoRA (efficient fine-tuning)
lora_config = LoraConfig(
    r=8,  # Low-rank parameter
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # Applies only to attention layers
    lora_dropout=0.05
)

model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=1,  # Reduce batch size for memory efficiency
    per_device_eval_batch_size=1,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    tokenizer=tokenizer,
)

trainer.train()

model.save_pretrained("./fine_tuned_llama2")
tokenizer.save_pretrained("./fine_tuned_llama2")

import gradio as gr
from transformers import pipeline

chatbot = pipeline("text-generation", model="./fine_tuned_llama2")

def chatbot_response(prompt):
    result = chatbot(prompt, max_length=100, do_sample=True, temperature=0.7)
    return result[0]["generated_text"]

iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text")
iface.launch()