VanguardAI commited on
Commit
aeda8f9
·
verified ·
1 Parent(s): b1f3cf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -55
app.py CHANGED
@@ -5,23 +5,20 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import gradio as gr
6
  import os
7
  import logging
8
- from unsloth import FastLanguageModel
9
  import subprocess
10
 
11
- # Set up logging for debugging
12
- logging.basicConfig(
13
- level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
14
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
15
- handlers=[
16
- logging.StreamHandler() # Logs will be output to the console
17
- ]
18
- )
19
  logger = logging.getLogger(__name__)
20
- logger.info("HELLO WORLD...")
 
 
 
 
21
 
22
  # Get environment variable for Hugging Face access
23
- READ_HF = os.environ["read_hf"]
24
-
25
  # Alpaca prompt template
26
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
27
 
@@ -79,7 +76,7 @@ You are an AI assistant tasked with managing inventory based on user instruction
79
 
80
  Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
81
  '''
82
-
83
  @spaces.GPU()
84
  def chunk_it(inventory_list, user_input_text):
85
  # Check for CUDA and NVIDIA-related errors
@@ -88,16 +85,16 @@ def chunk_it(inventory_list, user_input_text):
88
  device_count = torch.cuda.device_count()
89
  logger.info(f"Number of GPU devices: {device_count}")
90
  if device_count == 0:
91
- raise RuntimeError("No GPU devices found.") # Raise an error if no GPUs are detected
92
 
93
  # Check CUDA version using subprocess
94
  process = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
95
  cuda_version = process.stdout.strip()
96
  logger.info(f"CUDA version: {cuda_version}")
97
  if 'not found' in cuda_version.lower():
98
- raise RuntimeError("CUDA not found.") # Raise an error if CUDA is not found
99
 
100
- # Load model and tokenizer (your original code)
101
  model, tokenizer = FastLanguageModel.from_pretrained(
102
  model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
103
  max_seq_length = 2048,
@@ -107,33 +104,26 @@ def chunk_it(inventory_list, user_input_text):
107
  )
108
  logger.info("Model and tokenizer loaded.")
109
 
110
- # ... (rest of your code)
111
-
112
  formatted_prompt = alpaca_prompt.format(
113
- string + inventory_list, # instruction
114
- user_input_text, # input
115
- "", # output - leave this blank for generation!
116
  )
117
  logger.debug(f"Formatted prompt: {formatted_prompt}")
118
- try:
119
- inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
120
- logger.debug(f"Tokenized inputs: {inputs}")
121
- except Exception as e:
122
- logger.error(f"Failed to tokenize inputs: {e}")
123
- raise
124
- logger.info("Generating output...")
125
- try:
126
- outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
127
- logger.info("Output generated.")
128
- except Exception as e:
129
- logger.error(f"Failed to generate output: {e}")
130
- raise
131
- try:
132
- reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
133
- logger.debug(f"Decoded output: {reply}")
134
- except Exception as e:
135
- logger.error(f"Failed to decode output: {e}")
136
- raise
137
  logger.debug(f"Final reply: {reply}")
138
  return reply
139
 
@@ -141,7 +131,6 @@ def chunk_it(inventory_list, user_input_text):
141
  logger.error(f"Error loading model or CUDA issues: {e}")
142
  return "There seems to be an issue with CUDA or the model. Please check the Hugging Face Spaces environment."
143
 
144
-
145
  # Interface for inputs
146
  iface = gr.Interface(
147
  fn=chunk_it,
@@ -153,17 +142,4 @@ iface = gr.Interface(
153
  title="Testing",
154
  )
155
 
156
- # Set up logging to display in Gradio
157
- logger = logging.getLogger(__name__)
158
- logger.setLevel(logging.DEBUG) # Set the logging level
159
- ch = logging.StreamHandler(gr.Log()) # Create a StreamHandler and send logs to gr.Log
160
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
161
- ch.setFormatter(formatter)
162
- logger.addHandler(ch)
163
-
164
- logger.info("Launching Gradio interface...")
165
- try:
166
- iface.launch(inline=False)
167
- logger.info("Gradio interface launched.")
168
- except Exception as e:
169
- logger.error(f"Failed to launch Gradio interface: {e}")
 
5
  import gradio as gr
6
  import os
7
  import logging
8
+
9
  import subprocess
10
 
11
+ # Set up logging
 
 
 
 
 
 
 
12
  logger = logging.getLogger(__name__)
13
+ logger.setLevel(logging.DEBUG) # Set the logging level
14
+ ch = logging.StreamHandler(gr.Log()) # Create a StreamHandler and send logs to gr.Log
15
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
16
+ ch.setFormatter(formatter)
17
+ logger.addHandler(ch)
18
 
19
  # Get environment variable for Hugging Face access
20
+ READ_HF = os.environ.get("read_hf") #use .get to avoid error if variable doesn't exist
21
+ logger.info("Checking logger...")
22
  # Alpaca prompt template
23
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
24
 
 
76
 
77
  Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
78
  '''
79
+ from unsloth import FastLanguageModel
80
  @spaces.GPU()
81
  def chunk_it(inventory_list, user_input_text):
82
  # Check for CUDA and NVIDIA-related errors
 
85
  device_count = torch.cuda.device_count()
86
  logger.info(f"Number of GPU devices: {device_count}")
87
  if device_count == 0:
88
+ raise RuntimeError("No GPU devices found.")
89
 
90
  # Check CUDA version using subprocess
91
  process = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
92
  cuda_version = process.stdout.strip()
93
  logger.info(f"CUDA version: {cuda_version}")
94
  if 'not found' in cuda_version.lower():
95
+ raise RuntimeError("CUDA not found.")
96
 
97
+ # Load model and tokenizer
98
  model, tokenizer = FastLanguageModel.from_pretrained(
99
  model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
100
  max_seq_length = 2048,
 
104
  )
105
  logger.info("Model and tokenizer loaded.")
106
 
107
+ # Format the prompt
 
108
  formatted_prompt = alpaca_prompt.format(
109
+ string + inventory_list,
110
+ user_input_text,
111
+ "",
112
  )
113
  logger.debug(f"Formatted prompt: {formatted_prompt}")
114
+
115
+ # Tokenize the input
116
+ inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
117
+ logger.debug(f"Tokenized inputs: {inputs}")
118
+
119
+ # Generate output
120
+ outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
121
+ logger.info("Output generated.")
122
+
123
+ # Decode output
124
+ reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
125
+ logger.debug(f"Decoded output: {reply}")
126
+
 
 
 
 
 
 
127
  logger.debug(f"Final reply: {reply}")
128
  return reply
129
 
 
131
  logger.error(f"Error loading model or CUDA issues: {e}")
132
  return "There seems to be an issue with CUDA or the model. Please check the Hugging Face Spaces environment."
133
 
 
134
  # Interface for inputs
135
  iface = gr.Interface(
136
  fn=chunk_it,
 
142
  title="Testing",
143
  )
144
 
145
+ iface.launch(inline=False)