kodetr commited on
Commit
93403ab
·
verified ·
1 Parent(s): 3571b0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -10
app.py CHANGED
@@ -33,12 +33,26 @@ h3 {
33
  }
34
  """
35
 
36
- model = AutoModelForCausalLM.from_pretrained(
37
- MODEL_ID,
38
- torch_dtype=torch.bfloat16,
39
- device_map="auto",
40
- )
41
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  @spaces.GPU
44
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
@@ -51,7 +65,10 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
51
 
52
  print(f"Conversation is -\n{conversation}")
53
 
54
- # ------- use model stunting V5 -------
 
 
 
55
  terminators = [
56
  text_pipeline.tokenizer.eos_token_id,
57
  text_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
@@ -75,9 +92,10 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
75
  for part in full_text.split(". "): # Stream berdasarkan kalimat
76
  buffer += part.strip() + ". "
77
  yield buffer
78
-
79
- # ------- use model stunting V6 -------
80
-
 
81
 
82
  # input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
83
  # inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1
 
33
  }
34
  """
35
 
36
+ # -------------------------------------
37
+ # ------- use model stunting V5 -------
38
+ # -------------------------------------
39
+
40
+ text_pipeline = pipeline(
41
+ "text-generation",
42
+ model=MODEL_ID,
43
+ model_kwargs={"torch_dtype": torch.bfloat16},
44
+ device_map="auto",
45
+ )
46
+ # -------------------------------------
47
+ # ------- use model stunting V6 -------
48
+ # -------------------------------------
49
+
50
+ # model = AutoModelForCausalLM.from_pretrained(
51
+ # MODEL_ID,
52
+ # torch_dtype=torch.bfloat16,
53
+ # device_map="auto",
54
+ # )
55
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
56
 
57
  @spaces.GPU
58
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
65
 
66
  print(f"Conversation is -\n{conversation}")
67
 
68
+ # -------------------------------------
69
+ # ------- use model stunting V5 -------
70
+ # -------------------------------------
71
+
72
  terminators = [
73
  text_pipeline.tokenizer.eos_token_id,
74
  text_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
 
92
  for part in full_text.split(". "): # Stream berdasarkan kalimat
93
  buffer += part.strip() + ". "
94
  yield buffer
95
+
96
+ # -------------------------------------
97
+ # ------- use model stunting V6 -------
98
+ # -------------------------------------
99
 
100
  # input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
101
  # inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1