VanguardAI commited on
Commit
5b88db5
·
verified ·
1 Parent(s): 6b2c16b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -22
app.py CHANGED
@@ -6,8 +6,6 @@ import gradio as gr
6
  import os
7
  import logging
8
  from unsloth import FastLanguageModel
9
-
10
- # Set up logging
11
  logging.basicConfig(
12
  level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
13
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
@@ -16,9 +14,7 @@ logging.basicConfig(
16
  ]
17
  )
18
  logger = logging.getLogger(__name__)
19
-
20
  READ_HF = os.environ["read_hf"]
21
-
22
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
23
 
24
  ### Instruction:
@@ -29,7 +25,6 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
29
 
30
  ### Response:
31
  {}"""
32
-
33
  string = '''
34
  You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.
35
 
@@ -74,14 +69,11 @@ You are an AI assistant tasked with managing inventory based on user instruction
74
 
75
  Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
76
  '''
77
-
78
  @spaces.GPU()
79
- num_elements = (5 * 1024 * 1024) // 4
80
- # Create a tensor with the calculated number of elements
81
- tensor = torch.randn(num_elements, dtype=torch.float32)
82
- # Move the tensor to the GPU
83
- tensor_gpu = tensor.to('cuda')
84
  def chunk_it(inventory_list, user_input_text):
 
 
 
85
  logger.info("Loading model and tokenizer...")
86
  try:
87
  model, tokenizer = FastLanguageModel.from_pretrained(
@@ -95,7 +87,6 @@ def chunk_it(inventory_list, user_input_text):
95
  except Exception as e:
96
  logger.error(f"Failed to load model and tokenizer: {e}")
97
  raise
98
-
99
  logger.info("Enabling native 2x faster inference...")
100
  try:
101
  FastLanguageModel.for_inference(model)
@@ -103,21 +94,18 @@ def chunk_it(inventory_list, user_input_text):
103
  except Exception as e:
104
  logger.error(f"Failed to enable native inference: {e}")
105
  raise
106
-
107
  formatted_prompt = alpaca_prompt.format(
108
  string + inventory_list, # instruction
109
  user_input_text, # input
110
  "", # output - leave this blank for generation!
111
  )
112
  logger.debug(f"Formatted prompt: {formatted_prompt}")
113
-
114
  try:
115
  inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
116
  logger.debug(f"Tokenized inputs: {inputs}")
117
  except Exception as e:
118
  logger.error(f"Failed to tokenize inputs: {e}")
119
  raise
120
-
121
  logger.info("Generating output...")
122
  try:
123
  outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
@@ -125,22 +113,17 @@ def chunk_it(inventory_list, user_input_text):
125
  except Exception as e:
126
  logger.error(f"Failed to generate output: {e}")
127
  raise
128
-
129
  try:
130
  reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
131
  logger.debug(f"Decoded output: {reply}")
132
  except Exception as e:
133
  logger.error(f"Failed to decode output: {e}")
134
  raise
135
-
136
- # Uncomment the following lines if further processing of the reply is needed
137
  # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
138
  # match = re.search(pattern, reply[0], re.DOTALL)
139
  # reply = match.group(1).strip()
140
-
141
  logger.debug(f"Final reply: {reply}")
142
  return reply
143
-
144
  # Interface for inputs
145
  iface = gr.Interface(
146
  fn=chunk_it,
@@ -151,10 +134,9 @@ iface = gr.Interface(
151
  outputs=gr.Textbox(label="output", lines=23),
152
  title="Testing",
153
  )
154
-
155
  logger.info("Launching Gradio interface...")
156
  try:
157
  iface.launch(inline=False)
158
  logger.info("Gradio interface launched.")
159
  except Exception as e:
160
- logger.error(f"Failed to launch Gradio interface: {e}")
 
6
  import os
7
  import logging
8
  from unsloth import FastLanguageModel
 
 
9
  logging.basicConfig(
10
  level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
11
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 
14
  ]
15
  )
16
  logger = logging.getLogger(__name__)
 
17
  READ_HF = os.environ["read_hf"]
 
18
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
19
 
20
  ### Instruction:
 
25
 
26
  ### Response:
27
  {}"""
 
28
  string = '''
29
  You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.
30
 
 
69
 
70
  Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
71
  '''
 
72
  @spaces.GPU()
 
 
 
 
 
73
  def chunk_it(inventory_list, user_input_text):
74
+ num_elements = (5 * 1024 * 1024) // 4
75
+ tensor = torch.randn(num_elements, dtype=torch.float32)
76
+ tensor_gpu = tensor.to('cuda')
77
  logger.info("Loading model and tokenizer...")
78
  try:
79
  model, tokenizer = FastLanguageModel.from_pretrained(
 
87
  except Exception as e:
88
  logger.error(f"Failed to load model and tokenizer: {e}")
89
  raise
 
90
  logger.info("Enabling native 2x faster inference...")
91
  try:
92
  FastLanguageModel.for_inference(model)
 
94
  except Exception as e:
95
  logger.error(f"Failed to enable native inference: {e}")
96
  raise
 
97
  formatted_prompt = alpaca_prompt.format(
98
  string + inventory_list, # instruction
99
  user_input_text, # input
100
  "", # output - leave this blank for generation!
101
  )
102
  logger.debug(f"Formatted prompt: {formatted_prompt}")
 
103
  try:
104
  inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
105
  logger.debug(f"Tokenized inputs: {inputs}")
106
  except Exception as e:
107
  logger.error(f"Failed to tokenize inputs: {e}")
108
  raise
 
109
  logger.info("Generating output...")
110
  try:
111
  outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
 
113
  except Exception as e:
114
  logger.error(f"Failed to generate output: {e}")
115
  raise
 
116
  try:
117
  reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
118
  logger.debug(f"Decoded output: {reply}")
119
  except Exception as e:
120
  logger.error(f"Failed to decode output: {e}")
121
  raise
 
 
122
  # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
123
  # match = re.search(pattern, reply[0], re.DOTALL)
124
  # reply = match.group(1).strip()
 
125
  logger.debug(f"Final reply: {reply}")
126
  return reply
 
127
  # Interface for inputs
128
  iface = gr.Interface(
129
  fn=chunk_it,
 
134
  outputs=gr.Textbox(label="output", lines=23),
135
  title="Testing",
136
  )
 
137
  logger.info("Launching Gradio interface...")
138
  try:
139
  iface.launch(inline=False)
140
  logger.info("Gradio interface launched.")
141
  except Exception as e:
142
+ logger.error(f"Failed to launch Gradio interface: {e}")