Spaces:

kodetr
/

stunting-llm

Runtime error

App Files Files Community

kodetr commited on Apr 22

Commit

93403ab

verified ·

1 Parent(s): 3571b0e

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -10

app.py CHANGED Viewed

@@ -33,12 +33,26 @@ h3 {
 }
 """
-model = AutoModelForCausalLM.from_pretrained(
-          MODEL_ID,
-          torch_dtype=torch.bfloat16,
-          device_map="auto",
-        )
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 @spaces.GPU
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
@@ -51,7 +65,10 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
     print(f"Conversation is -\n{conversation}")
-    # ------- use model stunting V5 -------
     terminators = [
         text_pipeline.tokenizer.eos_token_id,
         text_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
@@ -75,9 +92,10 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
     for part in full_text.split(". "):  # Stream berdasarkan kalimat
         buffer += part.strip() + ". "
         yield buffer
-    # ------- use model stunting V6 -------
     # input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
     # inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1

 }
 """
+# -------------------------------------
+# ------- use model stunting V5 -------
+# -------------------------------------
+text_pipeline = pipeline(
+    "text-generation",
+    model=MODEL_ID,
+    model_kwargs={"torch_dtype": torch.bfloat16},
+    device_map="auto",
+)
+# -------------------------------------
+# ------- use model stunting V6 -------
+# -------------------------------------
+# model = AutoModelForCausalLM.from_pretrained(
+#           MODEL_ID,
+#           torch_dtype=torch.bfloat16,
+#           device_map="auto",
+#         )
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 @spaces.GPU
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     print(f"Conversation is -\n{conversation}")
+# -------------------------------------
+# ------- use model stunting V5 -------
+# -------------------------------------
     terminators = [
         text_pipeline.tokenizer.eos_token_id,
         text_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
     for part in full_text.split(". "):  # Stream berdasarkan kalimat
         buffer += part.strip() + ". "
         yield buffer
+# -------------------------------------
+# ------- use model stunting V6 -------
+# -------------------------------------
     # input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
     # inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1