Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Sleeping

App Files Files Community

VanguardAI commited on Jul 8, 2024

Commit

aeda8f9

verified ·

1 Parent(s): b1f3cf3

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -55

app.py CHANGED Viewed

@@ -5,23 +5,20 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import os
 import logging
-from unsloth import FastLanguageModel
 import subprocess
-# Set up logging for debugging
-logging.basicConfig(
-    level=logging.DEBUG,  # Set the logging level to DEBUG to capture all messages
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler()  # Logs will be output to the console
-    ]
-)
 logger = logging.getLogger(__name__)
-logger.info("HELLO WORLD...")
 # Get environment variable for Hugging Face access
-READ_HF = os.environ["read_hf"]
 # Alpaca prompt template
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@@ -79,7 +76,7 @@ You are an AI assistant tasked with managing inventory based on user instruction
 Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
 '''
 @spaces.GPU()
 def chunk_it(inventory_list, user_input_text):
     # Check for CUDA and NVIDIA-related errors
@@ -88,16 +85,16 @@ def chunk_it(inventory_list, user_input_text):
         device_count = torch.cuda.device_count()
         logger.info(f"Number of GPU devices: {device_count}")
         if device_count == 0:
-            raise RuntimeError("No GPU devices found.")  # Raise an error if no GPUs are detected
         # Check CUDA version using subprocess
         process = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
         cuda_version = process.stdout.strip()
         logger.info(f"CUDA version: {cuda_version}")
         if 'not found' in cuda_version.lower():
-            raise RuntimeError("CUDA not found.")  # Raise an error if CUDA is not found
-        # Load model and tokenizer (your original code)
         model, tokenizer = FastLanguageModel.from_pretrained(
             model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
             max_seq_length = 2048,
@@ -107,33 +104,26 @@ def chunk_it(inventory_list, user_input_text):
         )
         logger.info("Model and tokenizer loaded.")
-        # ... (rest of your code)
         formatted_prompt = alpaca_prompt.format(
-            string + inventory_list,  # instruction
-            user_input_text,  # input
-            "",  # output - leave this blank for generation!
         )
         logger.debug(f"Formatted prompt: {formatted_prompt}")
-        try:
-            inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
-            logger.debug(f"Tokenized inputs: {inputs}")
-        except Exception as e:
-            logger.error(f"Failed to tokenize inputs: {e}")
-            raise
-        logger.info("Generating output...")
-        try:
-            outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
-            logger.info("Output generated.")
-        except Exception as e:
-            logger.error(f"Failed to generate output: {e}")
-            raise
-        try:
-            reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-            logger.debug(f"Decoded output: {reply}")
-        except Exception as e:
-            logger.error(f"Failed to decode output: {e}")
-            raise
         logger.debug(f"Final reply: {reply}")
         return reply
@@ -141,7 +131,6 @@ def chunk_it(inventory_list, user_input_text):
         logger.error(f"Error loading model or CUDA issues: {e}")
         return "There seems to be an issue with CUDA or the model. Please check the Hugging Face Spaces environment."
 # Interface for inputs
 iface = gr.Interface(
     fn=chunk_it,
@@ -153,17 +142,4 @@ iface = gr.Interface(
     title="Testing",
 )
-# Set up logging to display in Gradio
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)  # Set the logging level
-ch = logging.StreamHandler(gr.Log())  # Create a StreamHandler and send logs to gr.Log
-formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-ch.setFormatter(formatter)
-logger.addHandler(ch)
-logger.info("Launching Gradio interface...")
-try:
-    iface.launch(inline=False)
-    logger.info("Gradio interface launched.")
-except Exception as e:
-    logger.error(f"Failed to launch Gradio interface: {e}")

 import gradio as gr
 import os
 import logging
 import subprocess
+# Set up logging
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)  # Set the logging level
+ch = logging.StreamHandler(gr.Log())  # Create a StreamHandler and send logs to gr.Log
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+logger.addHandler(ch)
 # Get environment variable for Hugging Face access
+READ_HF = os.environ.get("read_hf") #use .get to avoid error if variable doesn't exist
+logger.info("Checking logger...")
 # Alpaca prompt template
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
 '''
+from unsloth import FastLanguageModel
 @spaces.GPU()
 def chunk_it(inventory_list, user_input_text):
     # Check for CUDA and NVIDIA-related errors
         device_count = torch.cuda.device_count()
         logger.info(f"Number of GPU devices: {device_count}")
         if device_count == 0:
+            raise RuntimeError("No GPU devices found.")
         # Check CUDA version using subprocess
         process = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
         cuda_version = process.stdout.strip()
         logger.info(f"CUDA version: {cuda_version}")
         if 'not found' in cuda_version.lower():
+            raise RuntimeError("CUDA not found.")
+        # Load model and tokenizer
         model, tokenizer = FastLanguageModel.from_pretrained(
             model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
             max_seq_length = 2048,
         )
         logger.info("Model and tokenizer loaded.")
+        # Format the prompt
         formatted_prompt = alpaca_prompt.format(
+            string + inventory_list,
+            user_input_text,
+            "",
         )
         logger.debug(f"Formatted prompt: {formatted_prompt}")
+        # Tokenize the input
+        inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
+        logger.debug(f"Tokenized inputs: {inputs}")
+        # Generate output
+        outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
+        logger.info("Output generated.")
+        # Decode output
+        reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        logger.debug(f"Decoded output: {reply}")
         logger.debug(f"Final reply: {reply}")
         return reply
         logger.error(f"Error loading model or CUDA issues: {e}")
         return "There seems to be an issue with CUDA or the model. Please check the Hugging Face Spaces environment."
 # Interface for inputs
 iface = gr.Interface(
     fn=chunk_it,
     title="Testing",
 )
+iface.launch(inline=False)