File size: 4,340 Bytes
686b9bf
55f4e8e
ad4a2a0
 
55f4e8e
 
5f52293
686b9bf
5f52293
8933f51
5f52293
55f4e8e
 
 
 
 
 
 
 
 
 
 
ad4a2a0
2398182
55f4e8e
5f52293
55f4e8e
 
5f52293
 
55f4e8e
5f52293
 
 
 
686b9bf
55f4e8e
 
 
686b9bf
 
 
 
 
 
 
 
 
 
 
 
 
55f4e8e
686b9bf
5f52293
4a19484
686b9bf
5f52293
 
 
 
55f4e8e
5f52293
 
 
 
 
 
 
55f4e8e
5f52293
 
55f4e8e
5f52293
55f4e8e
 
 
 
5f52293
55f4e8e
 
5f52293
55f4e8e
5f52293
55f4e8e
5f52293
55f4e8e
 
 
5f52293
55f4e8e
 
 
 
5f52293
55f4e8e
5f52293
 
 
 
 
 
 
55f4e8e
 
 
 
5f52293
 
686b9bf
55f4e8e
686b9bf
 
 
 
5f52293
686b9bf
 
5f52293
686b9bf
55f4e8e
5f52293
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import torch
import spaces
import os
HF_TOKEN = os.environ["HF_TOKEN"]
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, PeftModel, get_peft_model
import gradio as gr

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters")

# Configuration for 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load base model with quantization (replace 'your-username' if needed)
base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3-8B-Instruct", # Replace with actual base model
    quantization_config=bnb_config,
    use_auth_token=HF_TOKEN,
)

# Apply LoRA adapters 
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0,
    bias="none",
    task_type="CAUSAL_LM"
)

model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=peft_config)

condition = '''
ALWAYS provide output in a JSON format.
'''
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""


@spaces.GPU(duration=300)
def chunk_it(inventory_list, user_input_text):
    model.to('cuda')
    inputs = tokenizer(
        [
            alpaca_prompt.format(
                '''
                You will receive text input that you need to analyze to perform the following tasks:

                transaction: Record the details of an item transaction.
                last n days transactions: Retrieve transaction records for a specified time period.
                view risk inventory: View inventory items based on a risk category.
                view inventory: View inventory details.
                new items: Add new items to the inventory.
                report generation: Generate various inventory reports.
                delete item: Delete an existing Item.

                Required Parameters:
                Each task requires specific parameters to execute correctly:

                transaction:
                  ItemName (string)
                  ItemQt (quantity - integer)
                  Type (string: "sale" or "purchase" or "return")
                  ReorderPoint (integer)
                last n days transactions:
                  ItemName (string)
                  Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
                view risk inventory:
                  RiskType (string: "overstock", "understock", or "Null" for all risk types)
                view inventory:
                  ItemName (string)
                new items:
                  ItemName (string)
                  SellingPrice (number)
                  CostPrice (number)
                report generation:
                  ItemName (string)
                  Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
                  ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)

                The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
                ''' + inventory_list + 
                '''
                ALWAYS provide output in a JSON format.
                ''',  # instruction
                user_input_text,  # input
                "",  # output - leave this blank for generation!
            )
        ], return_tensors="pt").to("cuda")

    # Generation with a longer max_length and better sampling
    outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)  

    content = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return content[0]

# Interface for inputs
iface = gr.Interface(
    fn=chunk_it,
    inputs=[
        gr.Textbox(label="user_input_text", lines=3),
        gr.Textbox(label="inventory_list", lines=5)
    ],
    outputs="text",
    title="Formatter Pro",
)

iface.launch(inline=False)