Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Runtime error

App Files Files Community

VanguardAI commited on Jul 8, 2024

Commit

d5262d8

verified ·

1 Parent(s): d697408

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -25

app.py CHANGED Viewed

@@ -4,12 +4,9 @@ import re
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import os
-READ_HF=os.environ["read_hf"]
 from unsloth import FastLanguageModel
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
@@ -64,37 +61,47 @@ You are an AI assistant tasked with managing inventory based on user instruction
 - Pay close attention to the case and spelling of function names and parameters.
 Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
 '''
 @spaces.GPU()
 def chunk_it(inventory_list, user_input_text):
     model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit", # YOUR MODEL YOU USED FOR TRAINING
-    max_seq_length = 2048,
-    dtype = torch.bfloat16,
-    load_in_4bit = True,
-    token= READ_HF
     )
-    FastLanguageModel.for_inference(model) # Enable native 2x faster inference
-    inputs = tokenizer(
-        [
-            alpaca_prompt.format(
-                string + inventory_list,  # instruction
-                user_input_text,  # input
-                "",  # output - leave this blank for generation!
-            )
-        ], return_tensors="pt").to("cuda")
-    # Generation with a longer max_length and better sampling
-    outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
     reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
     # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
-    # # Search for the pattern in the text
-    # match = re.search(pattern, reply[0], re.DOTALL)  # re.DOTALL allows '.' to match newlines
     # reply = match.group(1).strip()
     return reply
 # Interface for inputs
@@ -108,4 +115,6 @@ iface = gr.Interface(
     title="Testing",
 )
 iface.launch(inline=False)

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import os
+READ_HF = os.environ["read_hf"]
 from unsloth import FastLanguageModel
 alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
 - Pay close attention to the case and spelling of function names and parameters.
 Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
 '''
 @spaces.GPU()
 def chunk_it(inventory_list, user_input_text):
+    print("Loading model and tokenizer...")
     model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
+        max_seq_length = 2048,
+        dtype = torch.bfloat16,
+        load_in_4bit = True,
+        token = READ_HF
+    )
+    print("Model and tokenizer loaded.")
+    print("Enabling native 2x faster inference...")
+    FastLanguageModel.for_inference(model)
+    print("Inference enabled.")
+    formatted_prompt = alpaca_prompt.format(
+        string + inventory_list,  # instruction
+        user_input_text,  # input
+        "",  # output - leave this blank for generation!
     )
+    print("Formatted prompt: ", formatted_prompt)
+    inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
+    print("Tokenized inputs: ", inputs)
+    print("Generating output...")
+    outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
+    print("Output generated.")
     reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    print("Decoded output: ", reply)
+    # Uncomment the following lines if further processing of the reply is needed
     # pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
+    # match = re.search(pattern, reply[0], re.DOTALL)
     # reply = match.group(1).strip()
+    print("Final reply: ", reply)
     return reply
 # Interface for inputs
     title="Testing",
 )
+print("Launching Gradio interface...")
 iface.launch(inline=False)
+print("Gradio interface launched.")