VanguardAI commited on
Commit
d5262d8
·
verified ·
1 Parent(s): d697408

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -25
app.py CHANGED
@@ -4,12 +4,9 @@ import re
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import gradio as gr
6
  import os
7
- READ_HF=os.environ["read_hf"]
8
  from unsloth import FastLanguageModel
9
 
10
-
11
-
12
-
13
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
14
 
15
  ### Instruction:
@@ -64,37 +61,47 @@ You are an AI assistant tasked with managing inventory based on user instruction
64
  - Pay close attention to the case and spelling of function names and parameters.
65
 
66
  Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
67
-
68
  '''
69
 
70
-
71
  @spaces.GPU()
72
  def chunk_it(inventory_list, user_input_text):
 
73
  model, tokenizer = FastLanguageModel.from_pretrained(
74
- model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit", # YOUR MODEL YOU USED FOR TRAINING
75
- max_seq_length = 2048,
76
- dtype = torch.bfloat16,
77
- load_in_4bit = True,
78
- token= READ_HF
 
 
 
 
 
 
 
 
 
 
 
79
  )
80
- FastLanguageModel.for_inference(model) # Enable native 2x faster inference
81
- inputs = tokenizer(
82
- [
83
- alpaca_prompt.format(
84
- string + inventory_list, # instruction
85
- user_input_text, # input
86
- "", # output - leave this blank for generation!
87
- )
88
- ], return_tensors="pt").to("cuda")
89
-
90
- # Generation with a longer max_length and better sampling
91
- outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
92
 
93
  reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
 
 
 
94
  # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
95
- # # Search for the pattern in the text
96
- # match = re.search(pattern, reply[0], re.DOTALL) # re.DOTALL allows '.' to match newlines
97
  # reply = match.group(1).strip()
 
 
98
  return reply
99
 
100
  # Interface for inputs
@@ -108,4 +115,6 @@ iface = gr.Interface(
108
  title="Testing",
109
  )
110
 
 
111
  iface.launch(inline=False)
 
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import gradio as gr
6
  import os
7
+ READ_HF = os.environ["read_hf"]
8
  from unsloth import FastLanguageModel
9
 
 
 
 
10
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
11
 
12
  ### Instruction:
 
61
  - Pay close attention to the case and spelling of function names and parameters.
62
 
63
  Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
 
64
  '''
65
 
 
66
  @spaces.GPU()
67
  def chunk_it(inventory_list, user_input_text):
68
+ print("Loading model and tokenizer...")
69
  model, tokenizer = FastLanguageModel.from_pretrained(
70
+ model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
71
+ max_seq_length = 2048,
72
+ dtype = torch.bfloat16,
73
+ load_in_4bit = True,
74
+ token = READ_HF
75
+ )
76
+ print("Model and tokenizer loaded.")
77
+
78
+ print("Enabling native 2x faster inference...")
79
+ FastLanguageModel.for_inference(model)
80
+ print("Inference enabled.")
81
+
82
+ formatted_prompt = alpaca_prompt.format(
83
+ string + inventory_list, # instruction
84
+ user_input_text, # input
85
+ "", # output - leave this blank for generation!
86
  )
87
+ print("Formatted prompt: ", formatted_prompt)
88
+
89
+ inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
90
+ print("Tokenized inputs: ", inputs)
91
+
92
+ print("Generating output...")
93
+ outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
94
+ print("Output generated.")
 
 
 
 
95
 
96
  reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
97
+ print("Decoded output: ", reply)
98
+
99
+ # Uncomment the following lines if further processing of the reply is needed
100
  # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
101
+ # match = re.search(pattern, reply[0], re.DOTALL)
 
102
  # reply = match.group(1).strip()
103
+
104
+ print("Final reply: ", reply)
105
  return reply
106
 
107
  # Interface for inputs
 
115
  title="Testing",
116
  )
117
 
118
+ print("Launching Gradio interface...")
119
  iface.launch(inline=False)
120
+ print("Gradio interface launched.")